## Load libraries
library(covid19)
library(ggplot2)
library(lubridate)
library(dplyr)
library(ggplot2)
library(sp)
library(raster)
library(viridis)
library(ggthemes)
library(sf)
library(rnaturalearth)
library(rnaturalearthdata)
library(RColorBrewer)
library(readr)
library(zoo)
library(tidyr)
options(scipen = '999')
# Tests
pd <- testing
entity_split <- strsplit(pd$Entity, split = ' - ')
pd$country <- unlist(lapply(entity_split, function(x){x[1]}))
pd$key <- unlist(lapply(entity_split, function(x){x[2]}))
pd <- pd %>% filter(key == 'tests performed')
pd <- pd %>% left_join(world_pop) %>%
  filter(sub_region %in% c('Sub-Saharan Africa', 'Southern Europe', 'Northern Europe', 'Western Europe'))

ggplot(data = pd,
       aes(x = Date,
           y = `Cumulative total`,
           group = country)) +
  geom_line(aes(color = country))

pd = esp_df %>%
  left_join(esp_pop) %>%
  mutate(p = deaths_non_cum / pop * 1000000)

ggplot(data = pd,
       aes(x = date,
           y = p)) +
  # geom_step() +
  geom_bar(stat = 'identity',
           fill = 'red',
           alpha = 0.6,
           color = NA) +
  # geom_ribbon(aes(x = date, ymin = 0, ymax = p), data = pd, fill = 'blue') +
  facet_wrap(~ccaa) +
  theme_minimal() +
  labs(x = 'Date',
       y = 'Daily deaths per 1,000,000',
       title = 'Daily deaths per 1,000,000 population')

Daily cases Spain

pd <- df_country %>%
  filter(country == 'Spain')

ggplot(data = pd,
       aes(x = date,
           y = cases_non_cum)) +
  geom_bar(stat = 'identity') +
  theme_simple() +
  labs(x = 'Fecha',
       y = 'Casos diarios',
       title = 'Casos confirmados nuevos')

Daily deahts in Spain

pd <- df_country %>%
  filter(country == 'Spain')

ggplot(data = pd,
       aes(x = date,
           y = deaths_non_cum)) +
  geom_bar(stat = 'identity') +
  theme_simple() +
  labs(x = 'Fecha',
       y = 'Muertes diarias',
       title = 'Muertes')

Daily cases world / population-adjusted

covid19::plot_day_zero(countries = c('Italy', 'Spain', 'US', 'Germany',
                                     'Canada', 'UK', 'Netherlands'
                                     ),
                       ylog = F,
                       day0 = 1,
                       cumulative = F,
                       calendar = T,
                       pop = T,
                       point_alpha = 0,
                       color_var = 'geo')

Cases per pop last week

pd <- df_country %>%
  left_join(world_pop %>% dplyr::select(iso, pop)) %>%
  group_by(country) %>%
  mutate(max_date = max(date)) %>%
  mutate(week_ago = max_date - 6) %>%
  # filter(date == max(date)) %>%
  filter(date >= week_ago, date <= max_date) %>%
  group_by(country) %>%
  summarise(y = sum(cases_non_cum),
            pop = dplyr::first(pop),
            date_range = paste0(min(date), ' - ', max(date)),
            yp = sum(cases_non_cum) / dplyr::first(pop) * 1000000) %>%
  ungroup %>%
  filter(pop > 1000000) %>%
  arrange(desc(yp)) %>%
  head(15) %>%
  mutate(country = ifelse(country == 'United Kingdom', 'UK', country))
pd$country <- factor(pd$country, levels = unique(pd$country))

ggplot(data = pd,
       aes(x = country,
           y = yp)) +
  geom_bar(stat = 'identity') +
  theme_simple() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5, size = 12)) +
  geom_text(aes(label = round(yp, digits = 0)),
            nudge_y = -50,
            color = 'white') +
  labs(x = '',
       y = 'Cases per 1,000,000 (last 7 days)',
       title = 'New confirmed COVID-19 cases per million population, last 7 days')

Deaths per pop last week

pd <- df_country %>%
  left_join(world_pop %>% dplyr::select(iso, pop)) %>%
  group_by(country) %>%
  mutate(max_date = max(date)) %>%
  mutate(week_ago = max_date - 6) %>%
  # filter(date == max(date)) %>%
  filter(date >= week_ago, date <= max_date) %>%
  group_by(country) %>%
  summarise(y = sum(deaths_non_cum),
            pop = dplyr::first(pop),
            date_range = paste0(min(date), ' - ', max(date)),
            yp = sum(deaths_non_cum) / dplyr::first(pop) * 1000000) %>%
  ungroup %>%
  filter(pop > 1000000) %>%
  arrange(desc(yp)) %>%
  head(10) %>%
  mutate(country = ifelse(country == 'United Kingdom', 'UK', country))
pd$country <- factor(pd$country, levels = unique(pd$country))

ggplot(data = pd,
       aes(x = country,
           y = yp)) +
  geom_bar(stat = 'identity') +
  theme_simple() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5, size = 12)) +
  geom_text(aes(label = round(yp, digits = 0)),
            nudge_y = -10,
            color = 'white') +
  labs(x = '',
       y = 'Deaths per 1,000,000 (last 7 days)',
       title = 'New confirmed COVID-19 deaths per million population, last 7 days')

Lombardia, Catalonia, Madrid

New cases in last week

pd <- esp_df %>%
  left_join(esp_pop %>% dplyr::select(ccaa, pop)) %>%
  mutate(country = 'Spain') %>%
  bind_rows(
    ita %>% left_join(ita_pop %>% dplyr::select(ccaa, pop)) %>% mutate(country = 'Italy')
  ) %>%
  bind_rows(
    df %>% filter(country == 'US') %>% mutate(ccaa = district) %>% left_join(regions_pop %>% dplyr::select(ccaa, pop)) %>% mutate(country = 'US')) %>%
  group_by(ccaa) %>%
  mutate(max_date = max(date)) %>%
  mutate(week_ago = max_date - 6) %>%
  # filter(date == max(date)) %>%
  filter(date >= week_ago, date <= max_date) %>%
  group_by(ccaa) %>%
  summarise(y = sum(cases_non_cum),
            country = dplyr::first(country),
            pop = dplyr::first(pop),
            date_range = paste0(min(date), ' - ', max(date))) %>%
  ungroup %>%
  mutate(yp = y / pop * 1000000) %>%
  ungroup %>%
  # filter(pop > 1000000) %>%
  arrange(desc(yp)) 

#Get country totals
pd %>%
  group_by(country) %>%
  summarise(y = sum(y, na.rm = T),
            pop = sum(pop, na.rm = T)) %>%
  ungroup %>%
  mutate(yp = y / pop * 1000000)
# A tibble: 3 x 4
  country      y       pop    yp
  <chr>    <dbl>     <dbl> <dbl>
1 Italy    18186  60491453  301.
2 Spain    25541  47026208  543.
3 US      203882 328239523  621.
library(knitr)
library(kableExtra)
pd <- pd %>%
  mutate(yp = round(yp, digits = 1)) %>%
  mutate(Rank = 1:nrow(pd)) %>%
  dplyr::select(Rank, 
                Región = ccaa,
                `Casos nuevos, 7 días` = y,
                Población = pop,
                `Casos nuevos 7 días por millón` = yp)
kable(pd) %>%
  kable_styling("striped", full_width = F) %>%
  column_spec(which(names(pd) == 'Casos nuevos 7 días por millón'), bold = T) %>%
  row_spec(which(pd$`Región` %in% esp_df$ccaa), bold = T, color = "white", background = "#D7261E")
Rank Región Casos nuevos, 7 días Población Casos nuevos 7 días por millón
1 Massachusetts 18385 6892503 2667.4
2 New Jersey 22466 8882190 2529.3
3 Rhode Island 2618 1059361 2471.3
4 New York 38936 19453561 2001.5
5 La Rioja 614 316798 1938.1
6 Connecticut 6182 3565287 1733.9
7 Delaware 1417 973764 1455.2
8 District of Columbia 965 705749 1367.3
9 CyL 3113 2399548 1297.3
10 Navarra 799 654214 1221.3
11 CLM 2473 2032863 1216.5
12 Illinois 14370 12671821 1134.0
13 Maryland 5803 6045680 959.9
14 País Vasco 1994 2207776 903.2
15 Madrid 5992 6663394 899.2
16 Nebraska 1710 1934408 884.0
17 Iowa 2709 3155070 858.6
18 Piemonte 3749 4376000 856.7
19 Pennsylvania 9644 12801989 753.3
20 Colorado 4149 5758736 720.5
21 Cataluña 5046 7675217 657.4
22 Lombardia 6508 10040000 648.2
23 Indiana 4273 6732219 634.7
24 South Dakota 559 884659 631.9
25 Liguria 973 1557000 624.9
26 Michigan 6210 9986857 621.8
27 Louisiana 2545 4648794 547.5
28 Virginia 4548 8535519 532.8
29 Mississippi 1582 2976149 531.6
30 Kansas 1425 2913314 489.1
31 Cantabria 278 581078 478.4
32 Trentino-Alto Adige 507 1070000 473.8
33 Aragón 619 1319291 469.2
34 Georgia 4895 10617423 461.0
35 North Dakota 315 762062 413.4
36 New Mexico 854 2096829 407.3
37 Emilia-Romagna 1795 4453000 403.1
38 Tennessee 2680 6829174 392.4
39 Arkansas 1096 3017804 363.2
40 New Hampshire 491 1359711 361.1
41 Wyoming 203 578759 350.8
42 Utah 1023 3205958 319.1
43 Galicia 860 2699499 318.6
44 Asturias 323 1022800 315.8
45 Alabama 1460 4903185 297.8
46 Veneto 1452 4905000 296.0
47 Ohio 3406 11689100 291.4
48 California 11280 39512223 285.5
49 Nevada 870 3080156 282.5
50 Wisconsin 1582 5822434 271.7
51 C. Valenciana 1259 5003769 251.6
52 Kentucky 1106 4467673 247.6
53 North Carolina 2533 10488084 241.5
54 Minnesota 1341 5639632 237.8
55 Florida 5079 21477737 236.5
56 Ceuta 20 84777 235.9
57 Missouri 1415 6137428 230.6
58 Extremadura 244 1067710 228.5
59 Arizona 1657 7278717 227.7
60 South Carolina 1167 5148714 226.7
61 Washington 1572 7614893 206.4
62 Abruzzo 262 1315000 199.2
63 Marche 301 1532000 196.5
64 Texas 5570 28995881 192.1
65 Andalucía 1561 8414240 185.5
66 Valle d’Aosta 23 126202 182.2
67 Toscana 672 3737000 179.8
68 Melilla 15 86487 173.4
69 Friuli Venezia Giulia 202 1216000 166.1
70 Oklahoma 601 3956971 151.9
71 Idaho 225 1787065 125.9
72 Baleares 132 1149460 114.8
73 Maine 148 1344212 110.1
74 Lazio 577 5897000 97.8
75 Puglia 391 4048000 96.6
76 Oregon 397 4217737 94.1
77 West Virginia 161 1792147 89.8
78 Murcia 97 1493898 64.9
79 Sicilia 326 5027000 64.8
80 Vermont 39 623989 62.5
81 Molise 15 308493 48.6
82 Canarias 102 2153389 47.4
83 Campania 275 5827000 47.2
84 Basilicata 24 567118 42.3
85 Sardegna 55 1648000 33.4
86 Alaska 24 731545 32.8
87 Calabria 58 1957000 29.6
88 Umbria 21 884640 23.7
89 Hawaii 23 1415872 16.2
90 Montana 16 1068778 15.0
91 American Samoa 0 NA NA
92 Diamond Princess 0 NA NA
93 Grand Princess 0 NA NA
94 Guam 5 NA NA
95 Northern Mariana Islands 0 NA NA
96 Puerto Rico 137 NA NA
97 Recovered 0 NA NA
98 United States Virgin Islands 6 NA NA
99 US 1 NA NA
100 Virgin Islands 4 NA NA
101 Wuhan Evacuee 4 NA NA

Just Italy and Spain

xpd = pd %>% filter(`Región` %in% c(ita$ccaa, esp_df$ccaa))
xpd$Rank <- 1:nrow(xpd)

kable(xpd) %>%
  kable_styling("striped", full_width = F) %>%
  column_spec(which(names(xpd) == 'Casos nuevos 7 días por millón'), bold = T) %>%
  row_spec(which(xpd$`Región` %in% esp_df$ccaa), bold = T, color = "white", background = "#D7261E")
Rank Región Casos nuevos, 7 días Población Casos nuevos 7 días por millón
1 La Rioja 614 316798 1938.1
2 CyL 3113 2399548 1297.3
3 Navarra 799 654214 1221.3
4 CLM 2473 2032863 1216.5
5 País Vasco 1994 2207776 903.2
6 Madrid 5992 6663394 899.2
7 Piemonte 3749 4376000 856.7
8 Cataluña 5046 7675217 657.4
9 Lombardia 6508 10040000 648.2
10 Liguria 973 1557000 624.9
11 Cantabria 278 581078 478.4
12 Trentino-Alto Adige 507 1070000 473.8
13 Aragón 619 1319291 469.2
14 Emilia-Romagna 1795 4453000 403.1
15 Galicia 860 2699499 318.6
16 Asturias 323 1022800 315.8
17 Veneto 1452 4905000 296.0
18 C. Valenciana 1259 5003769 251.6
19 Ceuta 20 84777 235.9
20 Extremadura 244 1067710 228.5
21 Abruzzo 262 1315000 199.2
22 Marche 301 1532000 196.5
23 Andalucía 1561 8414240 185.5
24 Valle d’Aosta 23 126202 182.2
25 Toscana 672 3737000 179.8
26 Melilla 15 86487 173.4
27 Friuli Venezia Giulia 202 1216000 166.1
28 Baleares 132 1149460 114.8
29 Lazio 577 5897000 97.8
30 Puglia 391 4048000 96.6
31 Murcia 97 1493898 64.9
32 Sicilia 326 5027000 64.8
33 Molise 15 308493 48.6
34 Canarias 102 2153389 47.4
35 Campania 275 5827000 47.2
36 Basilicata 24 567118 42.3
37 Sardegna 55 1648000 33.4
38 Calabria 58 1957000 29.6
39 Umbria 21 884640 23.7
x = esp_df %>% left_join(esp_pop) %>%
  bind_rows(ita %>% left_join(ita_pop)) %>%
  filter(date == '2020-04-09') %>%
  filter(ccaa %in% c('Madrid',
                     'Cataluña', 'Lombardia')) %>%
  dplyr::select(ccaa, deaths, cases, pop) %>%
  mutate(deathsp = deaths / pop * 100000,
         casesp = cases / pop * 100000)


covid19::plot_day_zero(countries = c('Italy', 'Spain', 'China', 'South Korea', 'Sinagpore'),
                       districts = c('Madrid', #'Hubei',
                     'Cataluña', 'Lombardia'),
                     by_district = T,
                     roll = 7,
                     deaths = F,
                     pop = T,
                     day0 = 0,
                     ylog = F,
                     calendar = T,
                     cumulative = F) +
  labs(x = 'Data',
       y = 'Casos diaris (mitjana mòbil de 7 dies)',
       title = 'Casos diaris per 1.000.000',
       subtitle = 'Mitjana mòbil de 7 dies') 

covid19::plot_day_zero(countries = c('Italy', 'Spain'),
                     roll = 7,
                     deaths = F,
                     pop = T,
                     day0 = 0,
                     ylog = F,
                     calendar = T,
                     cumulative = F) +
  labs(x = 'Data',
       y = 'Casos diaris per 1.000.000 (mitjana mòbil de 7 dies)',
       title = 'Casos diaris per 1.000.000',
       subtitle = 'Mitjana mòbil de 7 dies') +
  theme(legend.direction = 'horizontal',
        legend.position = 'top')

covid19::plot_day_zero(countries = c('Italy', 'Spain'),
                     roll = 7,
                     deaths = T,
                     pop = T,
                     day0 = 0,
                     ylog = F,
                     calendar = T,
                     cumulative = F) +
  labs(x = 'Data',
       y = 'Morts diaris per 1.000.000 (mitjana mòbil de 7 dies)',
       title = 'Morts diaris per 1.000.000',
       subtitle = 'Mitjana mòbil de 7 dies') +
  theme(legend.direction = 'horizontal',
        legend.position = 'top')

covid19::plot_day_zero(countries = c('Italy', 'Spain'),
                       by_district = T,
                       districts = c('Madrid', 'Emilia-Romagna',
                     'Cataluña', 'Lombardia'),
                     roll = 7,
                     deaths = F,
                     pop = T,
                     day0 = 0,
                     ylog = F,
                     calendar = T,
                     cumulative = F) +
  labs(x = 'Data',
       y = 'Casos diaris per 1.000.000 (mitjana mòbil de 7 dies)',
       title = 'Casos diaris per 1.000.000',
       subtitle = 'Mitjana mòbil de 7 dies') +
  theme(legend.direction = 'horizontal',
        legend.position = 'top')

Asia

covid19::plot_day_zero(countries = c('Japan', 'South Korea', 'Singapore', 'Hong Kong'),
                     roll = 7,
                     deaths = F,
                     # pop = T,
                     day0 = 0,
                     ylog = F,
                     calendar = T,
                     cumulative = F) +
    labs(x = 'Data',
       y = 'Casos diaris (mitjana mòbil de 3 dies)',
       title = 'Casos diaris',
       subtitle = 'Mitjana mòbil de 3 dies') +
  facet_wrap(~geo, scales = 'free_y') +
  theme(legend.position = 'none')

Day of week analysis

pd <- esp_df %>%
  arrange(date) %>%
  group_by(date) %>%
  summarise(deaths_non_cum = sum(deaths_non_cum),
            cases_non_cum = sum(cases_non_cum)) %>%
  ungroup %>%  
  mutate(dow = weekdays(date)) %>%
  mutate(week = isoweek(date)) %>%
  group_by(week) %>%
  mutate(start_date = min(date)) %>%
  ungroup %>%
  filter(date >= '2020-03-09')
pd$dow <- factor(pd$dow,
                 levels = c('Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday',
                            'Saturday', 'Sunday'),
                 labels = c('Lunes', 'Martes', 'Miércoles', 'Jueves', 'Viernes',
                            'Sábado', 'Domingo'))
n_cols <- length(unique(pd$start_date))
cols <- colorRampPalette(RColorBrewer::brewer.pal(n = 9, name = 'Spectral'))(n_cols)
pd$start_date <- factor(pd$start_date)
ggplot(data = pd,
       aes(x = dow,
           y = cases_non_cum,
           group = week,
           color = start_date)) +
  geom_line(size = 4) +
  geom_point(size = 4) +
  scale_color_manual(name = 'Primer día\nde la semana',
                     values = cols) +
  theme_simple() +
  labs(x = 'Día de la semana',
       y = 'Muertes')

Basque country vs rest of Spain

pd <- esp_df %>%
  mutate(geo = ifelse(ccaa == 'País Vasco', 'Basque country', 'Rest of Spain')) %>%
  group_by(geo, date) %>%
  summarise(deaths = sum(deaths)) %>%
  ungroup
pp <- esp_pop %>%
    mutate(geo = ifelse(ccaa == 'País Vasco', 'Basque country', 'Rest of Spain')) %>%
  group_by(geo) %>%
  summarise(pop = sum(pop))
pd <- left_join(pd, pp) %>% mutate(pk = deaths / pop * 100000)

ggplot(data = pd %>% filter(pk > 0.1),
       aes(x = date,
           y = pk,
           color = geo)) +
  geom_line() +
  labs(x = 'Date',
       y = 'Cumulative deaths per 100,000') +
  scale_color_manual(name = '',
                     values = c('red', 'black')) +
  theme_simple() 

Country trajectories, population adjusted

countries <- c(
  'Spain',
  'US',
  'France',
  # 'Portugal',
  'Italy',
  'China'
)
districts <- c('Lombardia', 'Cataluña', 
               'New York', 
               # 'Hubei',
               'CyL', 
               'CLM', 
               # 'Washington',
               'La Rioja',
               'Madrid')

plot_day_zero(countries = countries,
              districts = districts,
              ylog = F,
              day0 = 1,
              cumulative = F,
              time_before = 0,
              roll = 3,
              deaths = T,
              pop = T,
              by_district = T,
              point_alpha = 0,
              line_size = 3,
              color_var = 'geo')

Italy and Spain

dir.create('/tmp/totesmou')
plot_day_zero(countries = c('Spain', 'Italy', max_date = Sys.Date()-1),
              point_size = 2, calendar = T)

ggsave('/tmp/totesmou/1_italy_vs_spain.png',
       height = 5.6,
       width = 9.6)
plot_day_zero(countries = c('Spain', 'Italy'),
              point_size = 2, calendar = F)

ggsave('/tmp/totesmou/2_italy_vs_spain_temps_ajustat.png',
       height = 5.6,
       width = 9.6)
plot_day_zero(countries = c('Spain', 'Italy'),
              point_size = 2, calendar = T, deaths = T, day0 = 10)

plot_day_zero(countries = c('Spain', 'Italy'),
              point_size = 2, calendar = F, deaths = T, day0 = 10)

plot_day_zero(countries = c('Spain', 'Italy'),
              point_size = 2, calendar = F, deaths = T, day0 = 10, pop = T)

plot_day_zero(countries = c('Spain', 'Italy'),
              point_size = 2, calendar = F, deaths = T, day0 = 1, pop = T, roll = 3, roll_fun = 'mean')

plot_day_zero(countries = c('Spain', 'Italy'),
              districts = c('Cataluña', 'Madrid',
                            'Lombardia', 'Emilia-Romagna'),
              by_district = T,
              day0 = 10,
              pop = T)

plot_day_zero(countries = c('Spain', 'Italy'),
              districts = c('Cataluña', 'Madrid',
                            'Lombardia', 'Emilia-Romagna'),
              by_district = T,
              deaths = T,
              day0 = 1,
              pop = T)

plot_day_zero(countries = c('Spain', 'Italy'),
              districts = c('Cataluña', 'Madrid',
                            'Lombardia', 'Emilia-Romagna'),
              by_district = T,
              deaths = T,
              roll = 3,
              roll_fun = 'mean',
              day0 = 1,
              ylog = F,
              pop = T, calendar = T)

plot_day_zero(countries = c('Spain', 'Italy', 'US'),
              districts = c('Cataluña', 'Madrid', 
                            'New York', 
                            'Lombardia', 'Emilia-Romagna'),
              by_district = T,
              deaths = T,
              roll = 3,
              roll_fun = 'mean',
              day0 = 1,
              pop = T)

plot_day_zero(countries = c('Spain', 'Italy', 'US'),
              districts = c('Cataluña', 'Madrid', 
                            'New York', 
                            'Lombardia', 'Emilia-Romagna'),
              by_district = T,
              deaths = T,
              # roll = 7,
              roll_fun = 'mean',
              day0 = 1,
              pop = F)

plot_day_zero(color_var = 'iso', by_district = T,
              deaths = T,
              day0 = 1,
              alpha = 0.6,
              point_alpha = 0,
              calendar = T,
              countries = c('Spain', 'Italy'),
              pop = T)

place_transform <- function(x){ifelse(x == 'Madrid', 'Madrid',
                                      # ifelse(x == 'Cataluña', 'Cataluña',
                                             'Altres CCAA')
  # )
}
place_transform_ita <- function(x){
  ifelse(x == 'Lombardia', 'Lombardia', 
         # ifelse(x == 'Emilia Romagna', 'Emilia Romagna', 
                'Altres regions italianes')
  # )
}
pd <- esp_df %>% mutate(country = 'Espanya') %>%
  mutate(ccaa = place_transform(ccaa)) %>%
  bind_rows(ita %>% mutate(ccaa = place_transform_ita(ccaa),
                           country = 'Itàlia')) %>%
  group_by(country, ccaa, date) %>% 
  summarise(cases = sum(cases),
            uci = sum(uci),
            deaths = sum(deaths),
            cases_non_cum = sum(cases_non_cum),
            deaths_non_cum = sum(deaths_non_cum),
            uci_non_cum = sum(uci_non_cum)) %>%
  left_join(esp_pop %>%
              mutate(ccaa = place_transform(ccaa)) %>%
              bind_rows(ita_pop %>% mutate(ccaa = place_transform_ita(ccaa))) %>%
              group_by(ccaa) %>%
              summarise(pop = sum(pop))) %>%
  mutate(deaths_non_cum_p = deaths_non_cum / pop * 1000000) %>%
  group_by(country, date) %>%
  mutate(p_deaths_non_cum_country = deaths_non_cum / sum(deaths_non_cum) * 100,
         p_deaths_country = deaths / sum(deaths) * 100)
pd$ccaa <- factor(pd$ccaa,
                  levels = c('Madrid',
                             # 'Cataluña',
                             'Altres CCAA',
                             'Lombardia',
                             # 'Emilia Romagna',
                             'Altres regions italianes'))
cols <- c(
  rev(brewer.pal(n = 3, 'Reds'))[1:2],
  rev(brewer.pal(n = 3, 'Blues'))[1:2]
)

label_data <- pd %>%
  filter(country  %in% c('Itàlia', 'Espanya')) %>%
  group_by(country) %>%
  filter(date == max(date))  %>%
  mutate(label = gsub('\nitalianas', '',  gsub(' ', '\n', ccaa))) %>%
  mutate(x = date - 2,
         y = p_deaths_country + 
           ifelse(p_deaths_country > 50, 10, -9))
ggplot(data = pd %>% group_by(country) %>% mutate(start_day = dplyr::first(date[deaths >=50])) %>% filter(date >= start_day),
       aes(x = date,
           y = p_deaths_country,
           color = ccaa,
           group = ccaa)) +
  # geom_bar(stat = 'identity',
  #          position = position_dodge(width = 0.99)) +
  scale_fill_manual(name = '', values = cols) +
  scale_color_manual(name = '', values = cols) +
  geom_line(size = 2,
            aes(color = ccaa)) +
  # geom_point(size = 3,
  #            aes(color = ccaa)) +
  facet_wrap(~country) +
  # xlim(as.Date('2020-03-09'),
  #      Sys.Date()-1) +
  theme_simple() +
  geom_hline(yintercept = 50, lty = 2, alpha = 0.6) +
  # geom_line(lty = 2, alpha = 0.6) +
  labs(x = 'Data',
       y = 'Percentatge de morts',
       title = 'Percentatge de morts: regió més afectada vs resta del pais',
       subtitle = 'A partir del primer día a cada país amb 50 morts acumulades') +
  theme(legend.position = 'top',
        strip.text = element_text(size = 30),
        legend.text = element_text(size = 10))  +
  guides(color = guide_legend(nrow = 2,
                              keywidth = 5)) +
  geom_text(data = label_data,
            aes(x = x-2,
                y = y,
                label = label,
                color = ccaa),
            size = 7,
            show.legend = FALSE)

# Same cahrt as previous, but one shared axis
place_transform <- function(x){ifelse(x == 'Madrid', 'Madrid',
                                      # ifelse(x == 'Cataluña', 'Cataluña',
                                             'Rest of Spain')
  # )
}
place_transform_ita <- function(x){
  ifelse(x == 'Lombardia', 'Lombardia', 
         # ifelse(x == 'Emilia Romagna', 'Emilia Romagna', 
                'Rest of Italy')
  # )
}
pd <- esp_df %>% mutate(country = 'Spain') %>%
  mutate(ccaa = place_transform(ccaa)) %>%
  bind_rows(ita %>% mutate(ccaa = place_transform_ita(ccaa),
                           country = 'Italy')) %>%
  group_by(country, ccaa, date) %>% 
  summarise(cases = sum(cases),
            uci = sum(uci),
            deaths = sum(deaths),
            cases_non_cum = sum(cases_non_cum),
            deaths_non_cum = sum(deaths_non_cum),
            uci_non_cum = sum(uci_non_cum)) %>%
  left_join(esp_pop %>%
              mutate(ccaa = place_transform(ccaa)) %>%
              bind_rows(ita_pop %>% mutate(ccaa = place_transform_ita(ccaa))) %>%
              group_by(ccaa) %>%
              summarise(pop = sum(pop))) %>%
  mutate(deaths_non_cum_p = deaths_non_cum / pop * 1000000) %>%
  group_by(country, date) %>%
  mutate(p_deaths_non_cum_country = deaths_non_cum / sum(deaths_non_cum) * 100,
         p_deaths_country = deaths / sum(deaths) * 100)
pd$ccaa <- factor(pd$ccaa,
                  levels = c('Madrid',
                             # 'Cataluña',
                             'Rest of Spain',
                             'Lombardia',
                             # 'Emilia Romagna',
                             'Rest of Italy'))
cols <- c(
  rev(brewer.pal(n = 3, 'Reds'))[1:2],
  rev(brewer.pal(n = 3, 'Blues'))[1:2]
)

label_data <- pd %>%
  filter(country  %in% c('Italy', 'Spain')) %>%
  group_by(country) %>%
  filter(date == max(date))  %>%
  # mutate(label = gsub('\nitalianas', '',  gsub(' ', '\n', ccaa))) %>%
  mutate(x = date - 2,
         y = p_deaths_country + 
           ifelse(p_deaths_country > 50, 10, -9)) %>%
  ungroup
ggplot(data = pd %>% group_by(country) %>% mutate(start_day = dplyr::first(date[deaths >=30])) %>% filter(date >= start_day),
       aes(x = date,
           y = p_deaths_country,
           color = ccaa,
           group = ccaa)) +
  # geom_bar(stat = 'identity',
  #          position = position_dodge(width = 0.99)) +
  scale_fill_manual(name = '', values = cols) +
  scale_color_manual(name = '', values = cols) +
  geom_line(size = 2,
            aes(color = ccaa)) +
  # geom_point(size = 3,
  #            aes(color = ccaa)) +
  # facet_wrap(~country, scales = 'free_x') +
  # xlim(as.Date('2020-03-09'),
  #      Sys.Date()-1) +
  theme_simple() +
  geom_hline(yintercept = 50, lty = 2, alpha = 0.6) +
  # geom_line(lty = 2, alpha = 0.6) +
  labs(x = 'Date',
       y = 'Percentage of deaths',
       title = 'Percentage of country\'s cumulative COVID-19 deaths by geography',
       subtitle = 'Starting at first day with 50 or more cumulative deaths') +
  theme(legend.position = 'top',
        strip.text = element_text(size = 30),
        legend.text = element_text(size = 10))  +
  guides(color = guide_legend(nrow = 2,
                              keywidth = 5))# +

  # geom_text(data = label_data,
  #           aes(x = x-2,
  #               y = y,
  #               label = label,
  #               color = ccaa),
  #           size = 7,
  #           show.legend = FALSE)

Same chart as above but absolute numbers

# Same cahrt as previous, but one shared axis
place_transform <- function(x){ifelse(x == 'Madrid', 'Madrid',
                                      # ifelse(x == 'Cataluña', 'Cataluña',
                                             'Rest of Spain')
  # )
}
place_transform_ita <- function(x){
  ifelse(x == 'Lombardia', 'Lombardia', 
         # ifelse(x == 'Emilia Romagna', 'Emilia Romagna', 
                'Rest of Italy')
  # )
}
pd <- esp_df %>% mutate(country = 'Spain') %>%
  mutate(ccaa = place_transform(ccaa)) %>%
  bind_rows(ita %>% mutate(ccaa = place_transform_ita(ccaa),
                           country = 'Italy')) %>%
  group_by(country, ccaa, date) %>% 
  summarise(cases = sum(cases),
            uci = sum(uci),
            deaths = sum(deaths),
            cases_non_cum = sum(cases_non_cum),
            deaths_non_cum = sum(deaths_non_cum),
            uci_non_cum = sum(uci_non_cum)) %>%
  left_join(esp_pop %>%
              mutate(ccaa = place_transform(ccaa)) %>%
              bind_rows(ita_pop %>% mutate(ccaa = place_transform_ita(ccaa))) %>%
              group_by(ccaa) %>%
              summarise(pop = sum(pop))) %>%
  mutate(deaths_non_cum_p = deaths_non_cum / pop * 1000000) %>%
  group_by(country, date) %>%
  mutate(p_deaths_non_cum_country = deaths_non_cum / sum(deaths_non_cum) * 100,
         p_deaths_country = deaths / sum(deaths) * 100)
pd$ccaa <- factor(pd$ccaa,
                  levels = rev(c('Madrid',
                             # 'Cataluña',
                             'Rest of Spain',
                             'Lombardia',
                             # 'Emilia Romagna',
                             'Rest of Italy')))
cols <- c(
  rev(brewer.pal(n = 3, 'Reds'))[1:2],
  rev(brewer.pal(n = 3, 'Blues'))[1:2]
)

label_data <- pd %>%
  filter(country  %in% c('Italy', 'Spain')) %>%
  group_by(country) %>%
  filter(date == max(date))  %>%
  # mutate(label = gsub('\nitalianas', '',  gsub(' ', '\n', ccaa))) %>%
  mutate(x = date - 2,
         y = p_deaths_country + 
           ifelse(p_deaths_country > 50, 10, -9)) %>%
  ungroup

# Get moving
ma <- function(x, n = 2){
    
    if(length(x) >= n){
      stats::filter(x, rep(1 / n, n), sides = 1)
    } else {
      new_n <- length(x)
      stats::filter(x, rep(1 / new_n, new_n), sides = 1)
    }
}


ggplot(data = pd %>% group_by(country) %>% 
         mutate(start_day = dplyr::first(date[deaths >=1])) %>% 
         filter(date >= start_day) %>% 
         mutate(days_since = as.numeric(date - start_day)) %>%
         ungroup %>% arrange(date) %>%
         group_by(country, ccaa) %>%
         mutate(rolling_average = ma(deaths_non_cum, n = 3)) %>%
         ungroup,
       aes(x = date,
           y = rolling_average,
           color = ccaa,
           group = ccaa)) +
  # geom_bar(stat = 'identity',
  #          position = position_dodge(width = 0.99)) +
  scale_fill_manual(name = '', values = cols) +
  scale_color_manual(name = '', values = cols) +
  geom_line(size = 2,
            aes(color = ccaa)) +
  # geom_point(size = 3,
  #            aes(color = ccaa)) +
  # scale_y_log10(limits = c(1.5, 1000)) +
  # scale_y_log10() +
  facet_wrap(~country) +
  # xlim(as.Date('2020-03-09'),
  #      Sys.Date()-1) +
  theme_simple() +
  # geom_hline(yintercept = 50, lty = 2, alpha = 0.6) +
  # geom_line(lty = 2, alpha = 0.6) +
  labs(x = 'Date',
       y = 'Deaths (log-scale)',
       title = 'Daily COVID-19 deaths by geography',
       subtitle = '3-day rolling average') +
  theme(legend.position = 'top',
        plot.title = element_text(size = 30),
        plot.subtitle = element_text(size = 24),
        strip.text = element_text(size = 30, hjust = 0.5),
        legend.text = element_text(size = 20))  +
  guides(color = guide_legend(nrow = 2,
                              keywidth = 5))

ggsave('~/Desktop/madlom.png')
roll_curve <- function(data,
                       n = 4,
                       deaths = FALSE,
                       scales = 'fixed',
                       nrow = NULL,
                       ncol = NULL,
                       pop = FALSE){

  # Create the n day rolling avg
  ma <- function(x, n = 5){
    
    if(length(x) >= n){
      stats::filter(x, rep(1 / n, n), sides = 1)
    } else {
      new_n <- length(x)
      stats::filter(x, rep(1 / new_n, new_n), sides = 1)
    }
    
    
  }
  
  pd <- data
  if(deaths){
    pd$var <- pd$deaths_non_cum
  } else {
    pd$var <- pd$cases_non_cum
  }
  
  if('ccaa' %in% names(pd)){
    pd$geo <- pd$ccaa
  } else {
    pd$geo <- pd$iso
  }
  
  if(pop){
    # try to get population
    if(any(pd$geo %in% unique(esp_df$ccaa))){
      right <- esp_pop
      right_var <- 'ccaa'
    } else {
      right <- world_pop
      right_var <- 'iso'
    }
    pd <- pd %>% left_join(right %>% dplyr::select(all_of(right_var), pop),
                           by = c('geo' = right_var)) %>%
      mutate(var = var / pop * 100000)
  }
  
  pd <- pd %>%
    arrange(date) %>%
    group_by(geo) %>%
    mutate(with_lag = ma(var, n = n))
  
  
  ggplot() +
    geom_bar(data = pd,
         aes(x = date,
             y = var),
             stat = 'identity',
         fill = 'grey',
         alpha = 0.8) +
    geom_area(data = pd,
              aes(x = date,
                  y = with_lag),
              color = 'red',
              fill = 'red',
              alpha = 0.6) +
    facet_wrap(~geo, scales = scales, nrow = nrow, ncol = ncol) +
    theme_simple() +
    labs(x = '',
         y = ifelse(deaths, 'Deaths', 'Cases'),
         title = paste0('Daily (non-cumulative) ', ifelse(deaths, 'deaths', 'cases'),
                        ifelse(pop, ' (per 100,000)', '')),
         subtitle = paste0('Data as of ', max(data$date),
                           '.\nRed line = ', n, ' day rolling average. Grey bar = day-specific value.')) +
    theme(axis.text.x = element_text(size = 7,
                                     angle = 90,
                                     hjust = 0.5, vjust = 1)) #+
    # scale_x_date(name ='',
    #              breaks = sort(unique(pd$date)),
    #              labels = format(sort(unique(pd$date)), '%b %d'))
    # scale_y_log10()
}
this_ccaa <- 'Cataluña'
plot_data <- esp_df %>% mutate(geo = ccaa) %>% filter(ccaa == this_ccaa)
roll_curve(plot_data, scales = 'fixed')  + theme(strip.text = element_text(size = 30))

plot_data <- esp_df %>% mutate(geo = ccaa) %>% filter(ccaa == this_ccaa)
roll_curve(plot_data, deaths = T, scales = 'fixed') + theme(strip.text = element_text(size = 30))

african_countries <-  world_pop$country[world_pop$sub_region %in% c('Sub-Saharan Africa')]

pd <- plot_day_zero(countries = c(african_countries),
                    day0 = 1,
                    max_date = Sys.Date() - 46,
                    ylog = F) +
  ylim(0, 5000)
pd

pd <- plot_day_zero(countries = c(african_countries),
                    day0 = 1,
                    max_date = Sys.Date(),
                    ylog = F) +
  ylim(0, 5000)  
pd

pd <- plot_day_zero(countries = c(african_countries),
                    day0 = 1,
                    calendar = T,
                    max_date = Sys.Date(),
                    ylog = T) + theme(legend.position = 'none')
pd

pd <- plot_day_zero(countries = c(african_countries),
                    day0 = 10,
                    max_date = Sys.Date() - 13)
pd

pd <- plot_day_zero(countries = c(african_countries),
                    day0 = 10,
                    max_date = Sys.Date() - 6)
pd

pd <- plot_day_zero(countries = c(african_countries),
                    day0 = 10,
                    max_date = Sys.Date(),
                    ylog = F)
pd

latam_countries <-  world_pop$country[world_pop$sub_region %in% c('Latin America and the Caribbean')]
latam_countries <- latam_countries[!latam_countries %in% c('Guyana')]

pd <- plot_day_zero(countries = c(latam_countries),
                    day0 = 10,
                    max_date = Sys.Date() - 13)
pd

pd <- plot_day_zero(countries = c(latam_countries),
                    day0 = 10,
                    max_date = Sys.Date() - 6)
pd

pd <- plot_day_zero(countries = c(latam_countries),
                    day0 = 10,
                    max_date = Sys.Date())
pd

Latin America and Africa vs Europe

isos <- sort(unique(world_pop$sub_region))
keep_countries <- world_pop$country[world_pop$sub_region %in% c('Latin America and the Caribbean', 'Sub-Saharan Africa') |
                                      world_pop$region %in% 'Europe']
keep_countries <- keep_countries[!keep_countries %in% c('Guyana')]
pd <- df_country %>% ungroup %>%
  filter(country %in% keep_countries) %>%
  dplyr::select(-country) %>%
  left_join(world_pop) %>%
  group_by(iso) %>%
  mutate(day0 = min(date[cases >= 10])) %>%
  ungroup %>%
  mutate(days_since = date - day0) %>%
  filter(days_since >= 0)


cols <- c( 'black')
g <- ggplot(data = pd,
       aes(x = days_since,
           y = cases,
           group = country,
           color = region)) +
  geom_line(data = pd %>% filter(region == 'Europe'),
            alpha = 0.6) +
  scale_y_log10() +
  scale_color_manual(name = '', values = cols) +
  theme_simple() +
  labs(x = 'Days since first day at 10 cases') +
  theme(legend.position = 'top')
g

cols <- c('darkred', 'black')

g + 
    geom_line(data = pd %>% filter(region == 'Africa'),
            # alpha = 1,
            size = 1.5,
            alpha = 0.8) +
    scale_y_log10() +
    scale_color_manual(name = '', values = cols) 

cols <- c( 'darkorange', 'black')
g + 
    geom_line(data = pd %>% filter(sub_region == 'Latin America and the Caribbean'),
            # alpha = 1,
            size = 1.5,
            alpha = 0.8) +
    scale_color_manual(name = '', values = cols) 

cols <- c('darkred', 'darkorange', 'black')
g + 
    geom_line(data = pd %>% filter(sub_region != 'Europe'),
            # alpha = 1,
            size = 1.5,
            alpha = 0.8) +
    scale_color_manual(name = '', values = cols) 

# Assets
pyramid_dir <- '../../data-raw/pyramids/'
pyramid_files <- dir(pyramid_dir)
out_list <- list()
for(i in 1:length(pyramid_files)){
  out_list[[i]] <- read_csv(paste0(pyramid_dir, pyramid_files[i])) %>%
    mutate(region = gsub('.csv', '', pyramid_files[i]))
}
pyramid <- bind_rows(out_list)
make_pyramid <- function(the_region = 'AFRICA-2019'){
  sub_data <- pyramid %>% filter(region == the_region)
  sub_data$Age <- factor(sub_data$Age, levels = sub_data$Age)
  sub_data <- tidyr::gather(sub_data, key, value, M:F)
  ggplot(data = sub_data,
         aes(x = Age,
             y = value,
             fill = key)) +
    geom_bar(stat = 'identity',
             position = position_dodge(),
             alpha = 0.6,
             color = 'black') +
    scale_fill_manual(name = '', values = c('darkorange', 'lightblue')) +
    theme_simple() +
    labs(x = 'Age group',
         y = 'Population') +
    theme(legend.position = 'top')
}
make_pyramid_overlay <- function(){
  sub_data <- pyramid %>% filter(region %in% c('EUROPE-2019',
                                               'AFRICA-2019',
                                               'LATIN AMERICA AND THE CARIBBEAN-2019')) %>%
    mutate(region = gsub('-2019', '', region))
   sub_data$Age <- factor(sub_data$Age, levels = unique(sub_data$Age))
  sub_data <- tidyr::gather(sub_data, key, value, M:F) %>%
    group_by(Age, region) %>%
    summarise(value = sum(value)) %>%
    ungroup %>%
    group_by(region) %>%
    mutate(p = value / sum(value) * 100) %>%
    ungroup
  ggplot(data = sub_data,
         aes(x = Age,
             y = p,
             color = region,
             group = region,
             fill = region)) +
    geom_area(position = position_dodge(),
              alpha = 0.6) +
    scale_fill_manual(name = '',
                      values = c('darkred', 'darkorange', 'black')) +
    scale_color_manual(name = '',
                      values = c('darkred', 'darkorange', 'black')) +
    theme_simple() +
    theme(legend.position = 'top') +
    labs(x = 'Age group', y = 'Percentage')
}

make_pyramid(the_region = 'Spain-2019') + labs(title = 'Spain')
make_pyramid(the_region = 'Italy-2019') + labs(title = 'Italy')

make_pyramid(the_region = 'EUROPE-2019') + labs(title = 'Europe')
make_pyramid(the_region = 'Kenya-2019') + labs(title = 'Kenya')


make_pyramid(the_region = 'Guatemala-2019') + labs(title = 'Guatemala')

make_pyramid(the_region = 'AFRICA-2019') + labs(title = 'Africa')

make_pyramid(the_region = 'LATIN AMERICA AND THE CARIBBEAN-2019') + labs(title = 'Latin America and the Caribbean')

make_pyramid_overlay() + labs(title = 'Population distribution by region')
plot_day_zero(countries = c('South Africa', 'Spain'), day0 = 1, calendar = T)

plot_day_zero(countries = c('Kenya', 'Italy'), day0 = 10, calendar = F)

Pics

plot_day_zero(countries = c('China', 'Italy', 'Spain'),
              districts = c('Hubei', 'Lombardia', 'Cataluña', 'Madrid'),
              by_district = T,
              point_alpha = 0,
              day0 = 5,
              pop = F,
              deaths = T,
              ylog = T,
              calendar = F,
              roll = 5)

Map of portugal, france, spain

# cat_transform <- function(x){ifelse(x == 'Catalunya', 'Cataluña', x)}
cat_transform <- function(x){return(x)}
pd <- por_df %>% mutate(country = 'Portugal') %>%
  bind_rows(esp_df %>% mutate(country = 'Spain')) %>%
  bind_rows(fra_df %>% mutate(country = 'France')) %>%
  bind_rows(ita %>% mutate(country = 'Italy')) %>%
  bind_rows(
    df %>% filter(country == 'Andorra') %>%
      mutate(ccaa = 'Andorra')
  )
keep_date = pd %>% group_by(country) %>% summarise(max_date= max(date)) %>% summarise(x = min(max_date)) %>% .$x
pd <- pd %>%
  mutate(ccaa = cat_transform(ccaa)) %>%
  group_by(ccaa) %>%
  filter(date == keep_date) %>%
  # filter(date == '2020-03-27') %>%
  ungroup %>%
  dplyr::select(date, ccaa, deaths, deaths_non_cum,
                cases, cases_non_cum) %>%
  left_join(regions_pop %>%
              bind_rows(
                world_pop %>% filter(country == 'Andorra') %>% dplyr::mutate(ccaa = country) %>%
                  dplyr::select(-region, -sub_region)
              )) %>%
  mutate(cases_per_million = cases / pop * 1000000,
         deaths_per_million = deaths / pop * 1000000) %>%
  mutate(cases_per_million_non_cum = cases_non_cum / pop * 1000000,
         deaths_per_million_non_cum = deaths_non_cum / pop * 1000000)

map_esp1 <- map_esp
map_esp1@data <- map_esp1@data %>% dplyr::select(ccaa)
map_fra1 <- map_fra
map_fra1@data <- map_fra1@data %>% dplyr::select(ccaa = NAME_1)
map_por1 <- map_por
map_por1@data <- map_por1@data %>% dplyr::select(ccaa = CCDR)
map_ita1 <- map_ita 
map_ita1@data <- map_ita1@data %>% dplyr::select(ccaa)
map_and1 <- map_and
map_and1@data <- map_and1@data %>% dplyr::select(ccaa = NAME_0)


map <- 
  rbind(map_esp1,
        map_por1,
        map_fra1,
        map_ita1,
        map_and1)

# Remove areas not of interest
centroids <- coordinates(map)
centroids <- data.frame(centroids)
names(centroids) <- c('x', 'y')
centroids$ccaa <- map@data$ccaa
centroids <- left_join(centroids, pd)
# map <- map_sp <- map[centroids$y >35 & centroids$x > -10 &
#              centroids$x < 8 & (centroids$y < 43  | map@data$ccaa %in% c('Occitanie', 'Nouvelle-Aquitaine') |
#                                   map@data$ccaa %in% esp_df$ccaa),]
# map_sp <- map <-  map[centroids$x > -10 & centroids$y <47,]
map_sp <- map <-  map[centroids$x > -10 & centroids$y <77,]

# map_sp <- map <-  map[centroids$x > -10,]

# fortify
map <- fortify(map, region = 'ccaa')

# join data
map$ccaa <- map$id
map <- left_join(map, pd)
var <- 'deaths_per_million'
map$var <- as.numeric(unlist(map[,var]))
centroids <- centroids[,c('ccaa', 'x', 'y', var)]
centroids <- centroids %>%
  filter(ccaa %in% map_sp@data$ccaa)

# cols <- rev(RColorBrewer::brewer.pal(n = 9, name = 'Spectral'))
# cols <- c('#A16928','#bd925a','#d6bd8d','#edeac2','#b5c8b8','#79a7ac','#2887a1')
# cols <- c('#009392','#39b185','#9ccb86','#e9e29c','#eeb479','#e88471','#cf597e')
# cols <- c('#008080','#70a494','#b4c8a8','#f6edbd','#edbb8a','#de8a5a','#ca562c')
cols <- rev(colorRampPalette(c('darkred', 'red', 'darkorange', 'orange', 'yellow', 'lightblue'))(10))
g <- ggplot(data = map,
         aes(x = long,
             y = lat,
             group = group)) +
    geom_polygon(aes(fill = var),
                 lwd = 0.3,
                 # color = 'darkgrey',
                 color = NA,
                 size = 0.6) +
      scale_fill_gradientn(name = '',
                           colours = cols) +
  # scale_fill_() +
  ggthemes::theme_map() +
  theme(legend.position = 'bottom',
        plot.title = element_text(size = 16)) +
  guides(fill = guide_colorbar(direction= 'horizontal',
                               barwidth = 50,
                               barheight = 1,
                               label.position = 'bottom')) +
  labs(title = 'Cumulative COVID-19 deaths per million population, western Mediterranean',
       subtitle = paste0('Data as of ', format(max(pd$date), '%B %d, %Y'))) +
  geom_text(data = centroids,
            aes(x = x,
                y = y,
                group = NA,
                label = paste0(ccaa, '\n',
                               round(deaths_per_million, digits = 2))),
            alpha = 0.8,
            size = 3)
g

ggsave('/tmp/map_with_borders.png',
       height = 8, width = 13)

Animation, Portugal, France, Spain, Italy

dir.create('/tmp/animation_map/')
pd <- por_df %>% mutate(country = 'Portugal') %>%
  bind_rows(esp_df %>% mutate(country = 'Spain')) %>%
  bind_rows(fra_df %>% mutate(country = 'France')) %>%
  bind_rows(ita %>% mutate(country = 'Italy')) %>%
  bind_rows(
    df %>% filter(country == 'Andorra') %>%
      mutate(ccaa = 'Andorra')
  )
pd %>% group_by(country) %>% summarise(max_date= max(date))
# A tibble: 5 x 2
  country  max_date  
  <chr>    <date>    
1 Andorra  2020-04-27
2 France   2020-04-27
3 Italy    2020-04-27
4 Portugal 2020-04-28
5 Spain    NA        
unique_dates <- sort(unique(pd$date))
unique_dates <- unique_dates[unique_dates >= '2020-03-01']
popper <- regions_pop %>%
                bind_rows(
                  world_pop %>% filter(country == 'Andorra') %>% dplyr::mutate(ccaa = country) %>%
                    dplyr::select(-region, -sub_region)
                )


map_esp1 <- map_esp
map_esp1@data <- map_esp1@data %>% dplyr::select(ccaa)
map_fra1 <- map_fra
map_fra1@data <- map_fra1@data %>% dplyr::select(ccaa = NAME_1)
map_por1 <- map_por
map_por1@data <- map_por1@data %>% dplyr::select(ccaa = CCDR)
map_ita1 <- map_ita 
map_ita1@data <- map_ita1@data %>% dplyr::select(ccaa)
map_and1 <- map_and
map_and1@data <- map_and1@data %>% dplyr::select(ccaa = NAME_0)


map <- 
  rbind(map_esp1,
        map_por1,
        map_fra1,
        map_ita1,
        map_and1)

# Remove areas not of interest
centroids <- coordinates(map)
centroids <- data.frame(centroids)
names(centroids) <- c('x', 'y')
centroids$ccaa <- map@data$ccaa
# map <- map_sp <- map[centroids$y >35 & centroids$x > -10 &
#              # centroids$x < 8 &
#                (centroids$y < 43  | map@data$ccaa %in% c('Occitanie', 'Nouvelle-Aquitaine') |
#                                   map@data$ccaa %in% esp_df$ccaa),]
# map_sp <- map <-  map[centroids$x > -10 & centroids$y <47,]
map_sp <- map <-  map[centroids$x > -10 & centroids$y <477,]


# fortify
map <- fortify(map, region = 'ccaa')



for(i in 1:length(unique_dates)){
  this_date <- unique_dates[i]
    today_map <- map
    today_centroids <- centroids
    today_pd <- pd

  today_pd <- today_pd %>%
      mutate(ccaa = cat_transform(ccaa)) %>%
    group_by(ccaa) %>%
    # filter(date == max(date)) %>%
    filter(date == this_date) %>%
    ungroup %>%
    dplyr::select(date, ccaa, deaths, deaths_non_cum,
                  cases, cases_non_cum) %>%
    left_join(popper) %>%
    mutate(cases_per_million = cases / pop * 1000000,
           deaths_per_million = deaths / pop * 1000000) %>%
    mutate(cases_per_million_non_cum = cases_non_cum / pop * 1000000,
           deaths_per_million_non_cum = deaths_non_cum / pop * 1000000)
  
  today_centroids <- left_join(today_centroids, today_pd)

  
  # join data
  today_map$ccaa <- today_map$id
  today_map <- left_join(today_map, today_pd)
  var <- 'deaths_per_million'
  today_map$var <- as.numeric(unlist(today_map[,var]))
  today_map$var <- ifelse(is.na(today_map$var), 0, today_map$var)
  today_centroids <- today_centroids[,c('ccaa', 'x', 'y', var)]
  today_centroids <- today_centroids %>%
    filter(ccaa %in% today_map$ccaa)
  today_centroids$var <- today_centroids[,var]
  today_centroids$var <- ifelse(is.na(today_centroids$var), 0, today_centroids$var)

  
  # cols <- rev(RColorBrewer::brewer.pal(n = 9, name = 'Spectral'))
  # cols <- c('#A16928','#bd925a','#d6bd8d','#edeac2','#b5c8b8','#79a7ac','#2887a1')
  # cols <- c('#009392','#39b185','#9ccb86','#e9e29c','#eeb479','#e88471','#cf597e')
  # cols <- c('#008080','#70a494','#b4c8a8','#f6edbd','#edbb8a','#de8a5a','#ca562c')
  cols <- rev(colorRampPalette(c('darkred', 'red', 'darkorange', 'orange', 'yellow', 'white'))(17))
  g <- ggplot(data = today_map,
           aes(x = long,
               y = lat,
               group = group)) +
      geom_polygon(aes(fill = var),
                   lwd = 0.3,
                   # color = 'darkgrey',
                   color = NA,
                   size = 0.6) +
        scale_fill_gradientn(name = '',
                             colours = cols,
                             breaks = seq(0, 1100, 50),
                             limits = c(0, 1100)) +
    # scale_fill_() +
    ggthemes::theme_map() +
    theme(legend.position = 'right',
          plot.title = element_text(size = 24)) +
    guides(fill = guide_colorbar(direction= 'vertical',
                                 barwidth = 1,
                                 barheight = 30,
                                 label.position = 'left')) +
    labs(subtitle = 'Cumulative COVID-19 deaths per million population',
         title = paste0(format(this_date, '%B %d, %Y'))) +
    geom_text(data = today_centroids,
              aes(x = x,
                  y = y,
                  group = NA,
                  label = paste0(ccaa, '\n',
                                 round(var, digits = 2))),
              alpha = 0.8,
              size = 1.5)
  
  ggsave(paste0('/tmp/animation_map/', this_date, '.png'),
         plot = g,
         height = 6, width = 9)
}
# Command line
cd /tmp/animation_map
mogrify -resize 50% *.png
convert -delay 20 -loop 0 *.png result.gif

Deaths overall over time Spain

df_country %>% filter(country == 'Spain') %>% arrange(date) %>% tail
# A tibble: 6 x 10
# Groups:   country [1]
  country date        cases deaths   uci hospitalizations cases_non_cum
  <chr>   <date>      <dbl>  <dbl> <dbl>            <int>         <dbl>
1 Spain   2020-04-23 219735  22524  7780                0          4173
2 Spain   2020-04-24 223759  22902  7756                0          4024
3 Spain   2020-04-25 226629  23190  7764                0          2870
4 Spain   2020-04-26 229422  23521 10372                0          2793
5 Spain   2020-04-27 232128  23822 10437                0          2706
6 Spain   NA              0      0     0                0             0
# … with 3 more variables: deaths_non_cum <dbl>, uci_non_cum <dbl>, iso <chr>

Deaths yesterday

pd <- df_country
pd$value <- pd$deaths_non_cum
the_date <- Sys.Date() - 1
pd <- pd %>%
  filter(date == the_date) %>%
  dplyr::select(country, iso, cases, cases_non_cum,
                deaths, value) %>%
  dplyr::arrange(desc(value)) %>%
  left_join(world_pop %>% dplyr::select(-country)) %>%
  mutate(value_per_million = value / pop * 1000000) #%>% 
  # arrange(desc(value_per_million))
pd <- pd[1:10,]
pd$country <- factor(pd$country, levels = pd$country)
ggplot(data = pd,
       aes(x = country,
           y = value)) +
  geom_bar(stat = 'identity',
           fill = 'black') +
  theme_simple() +
  geom_text(aes(label = value),
            nudge_y = -20,
            size = 4,
            color = 'white') +
  labs(title = paste0('Confirmed COVID-19 deaths on ', the_date),
       x = '', y = '')

pd
# A tibble: 10 x 10
# Groups:   country [10]
   country iso    cases cases_non_cum deaths value    pop region sub_region
   <fct>   <chr>  <dbl>         <dbl>  <dbl> <dbl>  <dbl> <chr>  <chr>     
 1 US      USA   988197         22414  56259  1378 3.27e8 Ameri… Northern …
 2 France  FRA   165963          3743  23327   437 6.70e7 Europe Western E…
 3 United… GBR   158348          4311  21157   363 6.65e7 Europe Northern …
 4 Italy   ITA   199414          1739  26977   333 6.04e7 Europe Southern …
 5 Brazil  BRA    67446          4346   4603   317 2.09e8 Ameri… Latin Ame…
 6 Spain   ESP   232128          2706  23822   301 4.67e7 Europe Southern …
 7 Canada  CAN    49616          1629   2841   162 3.71e7 Ameri… Northern …
 8 Germany DEU   158758           988   6126   150 8.29e7 Europe Western E…
 9 Belgium BEL    46687           553   7207   113 1.14e7 Europe Western E…
10 Iran    IRN    91472           991   5806    96 8.18e7 Asia   Southern …
# … with 1 more variable: value_per_million <dbl>

Deaths per million yesterday per CCAA

pd <- esp_df
pd$value <- pd$deaths_non_cum
the_date <- max(pd$date)
pd <- pd %>%
  filter(date == max(date)) %>%
  dplyr::select(ccaa, cases, cases_non_cum,
                deaths, value) %>%
  dplyr::arrange(desc(value)) %>%
  left_join(esp_pop)%>%
  mutate(value_per_million = value / pop * 1000000) #%>% 
  # arrange(desc(value_per_million))
pd <- pd[1:10,]
pd$ccaa <- factor(pd$ccaa, levels = pd$ccaa)
ggplot(data = pd,
       aes(x = ccaa,
           y = value)) +
  geom_bar(stat = 'identity',
           fill = 'black') +
  theme_simple() +
  geom_text(aes(label = value),
            nudge_y = -20,
            size = 4,
            color = 'white')

pd
# A tibble: 10 x 7
   ccaa  cases cases_non_cum deaths value   pop value_per_million
   <fct> <dbl>         <dbl>  <dbl> <dbl> <dbl>             <dbl>
 1 <NA>     NA            NA     NA    NA    NA                NA
 2 <NA>     NA            NA     NA    NA    NA                NA
 3 <NA>     NA            NA     NA    NA    NA                NA
 4 <NA>     NA            NA     NA    NA    NA                NA
 5 <NA>     NA            NA     NA    NA    NA                NA
 6 <NA>     NA            NA     NA    NA    NA                NA
 7 <NA>     NA            NA     NA    NA    NA                NA
 8 <NA>     NA            NA     NA    NA    NA                NA
 9 <NA>     NA            NA     NA    NA    NA                NA
10 <NA>     NA            NA     NA    NA    NA                NA

Deaths yesterday animation

dir.create('/tmp/animation_deaths')
dates <- seq(as.Date('2020-03-17'), (Sys.Date()-1), by = 1)
for(i in 1:length(dates)){
  this_date <- dates[i]
  pd <- df_country
  pd$value <- pd$deaths_non_cum
  pd <- pd %>%
    filter(date == max(this_date)) %>%
    dplyr::select(country, cases, cases_non_cum,
                  deaths, value) %>%
    dplyr::arrange(desc(value))
  pd <- pd[1:10,]
  pd <- pd %>% filter(value > 0)
  pd$country <- gsub(' ', '\n', pd$country)
  pd$country <- factor(pd$country, levels = pd$country)
  pd$color_var <- pd$country == 'Spain'
  if('Spain' %in% pd$country){
    cols <- rev(c('darkred', 'black'))
  } else {
    cols <- 'black'
  }
  g <- ggplot(data = pd,
         aes(x = country,
             y = value)) +
    geom_bar(stat = 'identity',
             aes(fill = color_var),
             alpha = 0.8,
             show.legend = FALSE) +
    theme_simple() +
    geom_text(aes(label = value),
              nudge_y = max(pd$value) * .05,
              size = 5,
              color = 'black') +
    labs(title = 'Daily (non-cumulative) COVID-19 deaths',
         subtitle = format(this_date, '%B %d')) +
    labs(x = 'Country',
         y = 'Deaths') +
    theme(axis.text = element_text(size = 15),
          plot.subtitle = element_text(size = 20)) +
    scale_fill_manual(name ='',
                      values = cols) +
    ylim(0, 900)
  ggsave(filename = paste0('/tmp/animation_deaths/', this_date, '.png'),
         g)
}
# Command line
cd /tmp/animation_deaths
mogrify -resize 50% *.png
convert -delay 50 -loop 0 *.png result.gif

Heatmap

pd <- by_country <-  esp_df %>% mutate(country = 'Spain') %>% 
  bind_rows(ita %>% mutate(country = 'Italy')) %>%
  bind_rows(por_df %>% mutate(country = 'Portugal')) %>%
  bind_rows(fra_df %>% mutate(country = 'France'))
pd$value <- pd$deaths_non_cum
max_date <- pd %>% group_by(country) %>% summarise(d = max(date)) %>% ungroup %>% summarise(d = min(d)) %>% .$d
# pd$value <- ifelse(is.na(pd$value), 0, pd$value)
left <- expand.grid(date = seq(min(pd$date),
                               max(pd$date),
                               by = 1),
                    ccaa = sort(unique(pd$ccaa)))
Error in seq.int(0, to0 - from, by): 'to' must be a finite number
right <- pd %>% dplyr::select(date, ccaa, value)
pd <- left_join(left, right) %>% mutate(value = ifelse(is.na(value), NA, value))
Error in left_join(left, right): object 'left' not found
pd <- left_join(pd, by_country %>% dplyr::distinct(country, ccaa)) %>%
  filter(date <= max_date) %>%
  filter(value > 0)
the_limits <- c(1, 600)
the_breaks <- c(1, seq(100, 600, length = 6)) #seq(0, 600, length = 7)
pd$ccaa <- factor(pd$ccaa, levels = rev(unique(sort(pd$ccaa))))
ggplot(data = pd,
       aes(x = date,
           y = ccaa,
           color = value,
           size = value)) +
  # geom_tile(color = 'white') +
  geom_point(alpha = 0.8) +
  # geom_tile() +
  scale_color_gradientn(colors = rev(colorRampPalette(brewer.pal(n = 8, 'Spectral'))(5)),
                        name = '',
                        limits = the_limits,
                        breaks = the_breaks) +
    # scale_fill_gradientn(colors = rev(colorRampPalette(brewer.pal(n = 8, 'Spectral'))(5)),
    #                     name = '',
    #                     limits = the_limits,
    #                     breaks = the_breaks) +
  scale_size_area(name = '', limits = the_limits, breaks = the_breaks, max_size = 10) +
  
  theme_simple() +
  facet_wrap(~country, scales = 'free_y') +
  theme(strip.text = element_text(size = 20),
        axis.title = element_blank(),
        axis.text = element_text(size = 10),
        axis.text.x = element_text(size = 12)) +
  guides(color = guide_legend(), size = guide_legend()) +
  labs(title = 'Daily (non-cumulative) COVID-19 deaths by sub-state regions',
       caption = paste0('Data as of ', max_date))
Error: Faceting variables must have at least one value

ggsave('/tmp/1.png',
       width = 10,
       height = 8)
Error: Faceting variables must have at least one value

Heatmap per population

pd <- by_country <-  esp_df %>% mutate(country = 'Spain') %>%  bind_rows(ita %>% mutate(country = 'Italy'))
poppy <- bind_rows(ita_pop, esp_pop)
pd <- pd %>% left_join(poppy)
pd$value <- pd$deaths_non_cum / pd$pop * 1000000
max_date <- pd %>% group_by(country) %>% summarise(d = max(date)) %>% ungroup %>% summarise(d = min(d)) %>% .$d
# pd$value <- ifelse(is.na(pd$value), 0, pd$value)
left <- expand.grid(date = seq(min(pd$date),
                               max(pd$date),
                               by = 1),
                    ccaa = sort(unique(pd$ccaa)))
Error in seq.int(0, to0 - from, by): 'to' must be a finite number
right <- pd %>% dplyr::select(date, ccaa, value)
pd <- left_join(left, right) %>% mutate(value = ifelse(is.na(value), NA, value))
Error in left_join(left, right): object 'left' not found
pd <- left_join(pd, by_country %>% dplyr::distinct(country, ccaa)) %>%
  filter(date <= max_date) %>%
  filter(value > 0)
the_limits <- c(1, 60)
the_breaks <- c(1, seq(10, 60, length = 6)) #seq(0, 600, length = 7)
pd$ccaa <- factor(pd$ccaa, levels = rev(unique(sort(pd$ccaa))))
ggplot(data = pd,
       aes(x = date,
           y = ccaa,
           color = value,
           size = value)) +
  # geom_tile(color = 'white') +
  geom_point(alpha = 0.8) +
  scale_color_gradientn(colors = rev(colorRampPalette(brewer.pal(n = 8, 'Spectral'))(5)),
                        name = '',
                        limits = the_limits,
                        breaks = the_breaks) +
  scale_size_area(name = '', limits = the_limits, breaks = the_breaks, max_size = 10) +
  theme_simple() +
  facet_wrap(~country, scales = 'free') +
  theme(strip.text = element_text(size = 26),
        axis.title = element_blank(),
        axis.text = element_text(size = 16),
        axis.text.x = element_text(size = 12)) +
  guides(color = guide_legend(), size = guide_legend()) +
  labs(title = 'Daily COVID-19 deaths per 1,000,000 population by sub-state regions',
       caption = paste0('Data as of ', max_date))
Error: Faceting variables must have at least one value

ggsave('/tmp/2.png',
       width = 10,
       height = 8)
Error: Faceting variables must have at least one value

Madrid vs rest of state

place_transform <- function(x){ifelse(x == 'Madrid', 'Madrid',
                                      # ifelse(x == 'Cataluña', 'Cataluña',
                                             'Otras CCAA')
  # )
}
place_transform_ita <- function(x){
  ifelse(x == 'Lombardia', 'Lombardia', 
         # ifelse(x == 'Emilia Romagna', 'Emilia Romagna', 
                'Otras regiones italianas')
  # )
}
pd <- esp_df %>% mutate(country = 'España') %>%
  mutate(ccaa = place_transform(ccaa)) %>%
  bind_rows(ita %>% mutate(ccaa = place_transform_ita(ccaa),
                           country = 'Italia')) %>%
  group_by(country, ccaa, date) %>% 
  summarise(cases = sum(cases),
            uci = sum(uci),
            deaths = sum(deaths),
            cases_non_cum = sum(cases_non_cum),
            deaths_non_cum = sum(deaths_non_cum),
            uci_non_cum = sum(uci_non_cum)) %>%
  left_join(esp_pop %>%
              mutate(ccaa = place_transform(ccaa)) %>%
              bind_rows(ita_pop %>% mutate(ccaa = place_transform_ita(ccaa))) %>%
              group_by(ccaa) %>%
              summarise(pop = sum(pop))) %>%
  mutate(deaths_non_cum_p = deaths_non_cum / pop * 1000000) %>%
  group_by(country, date) %>%
  mutate(p_deaths_non_cum_country = deaths_non_cum / sum(deaths_non_cum) * 100,
         p_deaths_country = deaths / sum(deaths) * 100)
pd$ccaa <- factor(pd$ccaa,
                  levels = c('Madrid',
                             # 'Cataluña',
                             'Otras CCAA',
                             'Lombardia',
                             # 'Emilia Romagna',
                             'Otras regiones italianas'))
cols <- c(
  rev(brewer.pal(n = 3, 'Reds'))[1:2],
  rev(brewer.pal(n = 3, 'Blues'))[1:2]
)
ggplot(data = pd,
       aes(x = date,
           y = deaths_non_cum_p,
           fill = ccaa,
           group = ccaa)) +
  geom_bar(stat = 'identity',
           position = position_dodge(width = 0.99)) +
  scale_fill_manual(name = '', values = cols) +
  scale_color_manual(name = '', values = cols) +
  # geom_line(size = 0.2,
  #           aes(color = ccaa)) +
  xlim(as.Date('2020-03-09'),
       Sys.Date()-1) +
  theme_simple() +
  labs(x = 'Fecha',
       y = 'Muertes diarias por 1.000.000',
       title = 'Muertes por 1.000.000 habitantes') +
  theme(legend.position = 'top') +
  geom_text(aes(label = round(deaths_non_cum_p, digits = 1),
                color = ccaa,
                y = deaths_non_cum_p + 2,
                group = ccaa),
            size = 2.5,
            angle = 90,
            position = position_dodge(width = 0.99))

label_data <- pd %>%
  filter(country  %in% c('Italia', 'España')) %>%
  group_by(country) %>%
  filter(date == max(date))  %>%
  mutate(label = gsub('\nitalianas', '',  gsub(' ', '\n', ccaa))) %>%
  mutate(x = date - 2,
         y = p_deaths_country + 
           ifelse(p_deaths_country > 50, 10, -10))
ggplot(data = pd %>% group_by(country) %>% mutate(start_day = dplyr::first(date[deaths >=50])) %>% filter(date >= start_day),
       aes(x = date,
           y = p_deaths_country,
           color = ccaa,
           group = ccaa)) +
  # geom_bar(stat = 'identity',
  #          position = position_dodge(width = 0.99)) +
  scale_fill_manual(name = '', values = cols) +
  scale_color_manual(name = '', values = cols) +
  geom_line(size = 2,
            aes(color = ccaa)) +
  geom_point(size = 3,
             aes(color = ccaa)) +
  facet_wrap(~country, scales = 'free_x') +
  # xlim(as.Date('2020-03-09'),
  #      Sys.Date()-1) +
  theme_simple() +
  geom_hline(yintercept = 50, lty = 2, alpha = 0.6) +
  # geom_line(lty = 2, alpha = 0.6) +
  labs(x = 'Fecha',
       y = 'Porcentaje de muertes',
       title = 'Porcentaje de muertes del país: región más afectada vs. resto del país',
       subtitle = 'A partir del primer día en cada país con 50 o más muertes') +
  theme(legend.position = 'top',
        strip.text = element_text(size = 30),
        legend.text = element_text(size = 10))  +
  guides(color = guide_legend(nrow = 2,
                              keywidth = 5)) +
  geom_text(data = label_data,
            aes(x = x - 2,
                y = y,
                label = label,
                color = ccaa),
            size = 6,
            show.legend = FALSE)

Italy regions, Spanish regions, Chinese regions (adjusted for population)

# Spanish data
a <- esp_df %>%
  left_join(esp_pop) %>%
  mutate(country = 'Spain')
# Italian data
b <- ita %>%
  left_join(ita_pop) %>%
  mutate(country = 'Italy')
# Chinese data
d <- df %>% filter(country == 'China') %>%
  mutate(cases = cases) %>%
  mutate(ccaa = district) %>%
  mutate(country = 'China') %>%
  left_join(chi_pop)
# join
joined <- bind_rows(a, b, d)
# Get deaths per milllion
joined$deaths_pm <- joined$deaths / joined$pop * 1000000
joined$cases_pm <- joined$cases / joined$pop * 1000000

# Get the days since paradigm
x_deaths <- 5
x_deaths_pm <- 5
x_cases <- 50
x_cases_pm <- 50
joined <- joined %>%
  arrange(date) %>%
  group_by(ccaa) %>%
  mutate(start_deaths = min(date[deaths >= x_deaths]),
         start_cases = min(date[cases >= x_cases]),
         start_deaths_pm = min(date[deaths_pm >= x_deaths_pm]),
         start_cases_pm = min(date[cases_pm >= x_cases_pm])) %>%
  ungroup %>%
  mutate(days_since_start_deaths = date - start_deaths,
         days_since_start_cases = date - start_cases,
         days_since_start_deaths_pm = date - start_deaths_pm,
         days_since_start_cases_pm = date - start_cases_pm) 

# Define plot data
pd <- joined %>% filter(days_since_start_deaths_pm >= 0) %>%
  mutate(country = ifelse(country == 'China',
                          'Hubei (China)',
                          ifelse(country == 'Italy', 'Italia', 'España')))

lombardy_location <- (max(pd_big$days_since_start_deaths_pm[pd_big$ccaa == 'Lombardia']))
Error in eval(expr, envir, enclos): object 'pd_big' not found
# Define label data
label_data <- pd %>% group_by(ccaa) %>% filter(
                                                          (
                                                            (country == 'Hubei (China)' & days_since_start_deaths_pm == 22) |
                                                            (date == max(date) & country == 'España' & deaths_pm > 40 & days_since_start_deaths_pm >= 7) & ccaa != 'CyL' |
                                                              (date == max(date) & country == 'Italia' &
                                                                 ccaa != 'Liguria' & days_since_start_deaths_pm > 15)
                                                          ))
# Get differential label data based on what to be emphasized
bigs <- c('Madrid', 'Lombardia', 'Hubei')
label_data_big <- label_data %>%
  filter(ccaa %in% bigs)
label_data_small <- label_data %>%
  filter(!ccaa %in% bigs)

pd_big <- pd %>%
  filter(ccaa %in% bigs)
pd_small <- pd %>%
  filter(!ccaa %in% bigs)

# cols <- colorRampPalette(RColorBrewer::brewer.pal(n = 8, name = 'Set2'))(length(unique(pd$country)))
# cols <- rainbow(3)
cols <- c('darkred', '#FF6633', '#006666')

ggplot(data = pd_big,
       aes(x = days_since_start_deaths_pm,
           y = deaths_pm,
           group = ccaa)) +
  geom_line(aes(color = country),
            alpha = 0.9,
            size = 2) +
  geom_line(data = pd_small,
            aes(x = days_since_start_deaths_pm,
                y = deaths_pm,
                color = country),
            alpha = 0.7,
            size = 1) +
  scale_y_log10() +
  scale_color_manual(name = '',
                     values = c(cols)) +
  theme_simple() +
  theme(legend.position = 'top') +
  labs(x = 'Dias desde "el comienzo del brote"',
       y = 'Muertes por millón de habitantes',
       title = 'Muertes por 1.000.000 habitantes',
       subtitle = paste0('Dia 0 ("comienzo del brote") = primer día a ', x_deaths_pm, ' o más muertes acumuladas por milión de población\nLíneas rojas: CCAA; líneas verde-azules: regiones italianas; línea naranja: Hubei, China'),
       caption = '@joethebrew | www.databrew.cc') +
  geom_text(data = label_data_big,
            aes(x = days_since_start_deaths_pm + 0.6,
                y = (deaths_pm + 50),
                label = gsub(' ', '\n', ccaa),
                color = country),
            size = 8,
            alpha = 0.9,
            show.legend = FALSE) +
    geom_text(data = label_data_small,
            aes(x = days_since_start_deaths_pm + 0.6,
                y = deaths_pm  + (log(deaths_pm)/2),
                label = gsub(' ', '\n', ccaa),
                color = country),
            size = 5,
            alpha = 0.6,
            show.legend = FALSE) +
  theme(axis.text = element_text(size = 14),
        axis.title = element_text(size = 16),
        legend.text = element_text(size = 16),
        plot.title = element_text(size = 25))  +
  xlim(0, lombardy_location + 5)
Error in limits(c(...), "x"): object 'lombardy_location' not found

Italy regions, Spanish regions, Chinese regions (raw numbers, not adjusted for population)

# Spanish data
a <- esp_df %>%
  left_join(esp_pop) %>%
  mutate(country = 'Spain')
# Italian data
b <- ita %>%
  left_join(ita_pop) %>%
  mutate(country = 'Italy')
# Chinese data
d <- df %>% filter(country == 'China') %>%
  mutate(cases = cases) %>%
  mutate(ccaa = district) %>%
  mutate(country = 'China') %>%
  left_join(chi_pop)
# join
joined <- bind_rows(a, b, d)
# Get deaths per milllion
joined$deaths_pm <- joined$deaths / joined$pop * 1000000
joined$cases_pm <- joined$cases / joined$pop * 1000000

# Get the days since paradigm
x_deaths <- 5
x_deaths_pm <- 5
x_cases <- 50
x_cases_pm <- 50
joined <- joined %>%
  arrange(date) %>%
  group_by(ccaa) %>%
  mutate(start_deaths = min(date[deaths >= x_deaths]),
         start_cases = min(date[cases >= x_cases]),
         start_deaths_pm = min(date[deaths_pm >= x_deaths_pm]),
         start_cases_pm = min(date[cases_pm >= x_cases_pm])) %>%
  ungroup %>%
  mutate(days_since_start_deaths = date - start_deaths,
         days_since_start_cases = date - start_cases,
         days_since_start_deaths_pm = date - start_deaths_pm,
         days_since_start_cases_pm = date - start_cases_pm) 

# Define plot data
pd <- joined %>% filter(days_since_start_deaths >= 0) %>%
  mutate(country = ifelse(country == 'China',
                          'China',
                          ifelse(country == 'Italy', 'Italia', 'España')))

# Define label data
label_data <- pd %>% group_by(ccaa) %>% filter(
                                                          (
                                                            (country == 'China' & deaths >10 & days_since_start_deaths == 29) |
                                                            (date == max(date) & country == 'España' & deaths > 90) |
                                                              (date == max(date) & country == 'Italia' &
                                                                 ccaa != 'Liguria' & days_since_start_deaths > 10)
                                                          ))
# Get differential label data based on what to be emphasized
label_data_big <- label_data %>%
  filter(ccaa %in% c('Madrid', 'Lombardia', 'Hubei'))
label_data_small <- label_data %>%
  filter(!ccaa %in% c('Madrid', 'Lombardia', 'Hubei'))

pd_big <- pd %>%
  filter(ccaa %in% c('Madrid', 'Lombardia', 'Hubei'))
pd_small <- pd %>%
  filter(!ccaa %in% c('Madrid', 'Lombardia', 'Hubei'))

# cols <- colorRampPalette(RColorBrewer::brewer.pal(n = 8, name = 'Set2'))(length(unique(pd$country)))
# cols <- rainbow(3)
cols <- c( '#FF6633',  'darkred', '#006666')
ggplot(data = pd_big,
       aes(x = days_since_start_deaths,
           y = deaths,
           group = ccaa)) +
  geom_line(aes(color = country),
            alpha = 0.9,
            size = 2) +
  geom_line(data = pd_small,
            aes(x = days_since_start_deaths,
                y = deaths,
                color = country),
            alpha = 0.7,
            size = 1) +
  scale_y_log10() +
  scale_color_manual(name = '',
                     values = c(cols)) +
  theme_simple() +
  theme(legend.position = 'top') +
  labs(x = 'Dias desde el primer día con 5 o más muertes acumuladas',
       y = 'Muertes',
       title = 'Muertes por COVID-19',
       caption = '@joethebrew | www.databrew.cc') +
  geom_text(data = label_data_big,
            aes(x = days_since_start_deaths + 1.6,
                y = ifelse(ccaa == 'Hubei', (deaths -500),
                           ifelse(ccaa == 'Lombardia',  (deaths + 700),
                                   (deaths + 300))),
                label = gsub(' ', '\n', ccaa),
                color = country),
            size = 8,
            alpha = 0.9,
            show.legend = FALSE) +
    geom_text(data = label_data_small,
            aes(x = days_since_start_deaths + 1.6,
                align = 'left',
                y = deaths ,
                label = ccaa,
                # label = gsub(' ', '\n', ccaa),
                color = country),
            size = 5,
            alpha = 0.6,
            show.legend = FALSE) +
  theme(axis.text = element_text(size = 14),
        axis.title = element_text(size = 20),
        legend.text = element_text(size = 16),
        plot.title = element_text(size = 30))  +
  xlim(0, 50)

ANIMATION: Italy regions, Spanish regions, Chinese regions (raw numbers, not adjusted for population)

# Spanish data
a <- esp_df %>%
  left_join(esp_pop) %>%
  mutate(country = 'Spain')
# Italian data
b <- ita %>%
  left_join(ita_pop) %>%
  mutate(country = 'Italy')
# Chinese data
d <- df %>% filter(country == 'China') %>%
  mutate(cases = cases) %>%
  mutate(ccaa = district) %>%
  mutate(country = 'China') %>%
  left_join(chi_pop)
# join
joined <- bind_rows(a, b, d)
# Get deaths per milllion
joined$deaths_pm <- joined$deaths / joined$pop * 1000000
joined$cases_pm <- joined$cases / joined$pop * 1000000

# Get the days since paradigm
x_deaths <- 5
x_deaths_pm <- 5
x_cases <- 50
x_cases_pm <- 50
joined <- joined %>%
  arrange(date) %>%
  group_by(ccaa) %>%
  mutate(start_deaths = min(date[deaths >= x_deaths]),
         start_cases = min(date[cases >= x_cases]),
         start_deaths_pm = min(date[deaths_pm >= x_deaths_pm]),
         start_cases_pm = min(date[cases_pm >= x_cases_pm])) %>%
  ungroup %>%
  mutate(days_since_start_deaths = date - start_deaths,
         days_since_start_cases = date - start_cases,
         days_since_start_deaths_pm = date - start_deaths_pm,
         days_since_start_cases_pm = date - start_cases_pm) 

# Define plot data
pd <- joined %>% filter(days_since_start_deaths >= 0) %>%
  mutate(country = ifelse(country == 'China',
                          'China',
                          ifelse(country == 'Italy', 'Italia', 'España')))


add_zero <- function(x, n){
  x <- as.character(x)
  adders <- n - nchar(x)
  adders <- ifelse(adders < 0, 0, adders)
  for (i in 1:length(x)){
    if(!is.na(x[i])){
      x[i] <- paste0(
        paste0(rep('0', adders[i]), collapse = ''),
        x[i],
        collapse = '')  
    } 
  }
  return(x)
}
# # Define label data
# label_data <- pd %>% group_by(ccaa) %>% filter(
#                                                           (
#                                                             (country == 'China' & deaths >10 & days_since_start_deaths == 29) |
#                                                             (date == max(date) & country == 'España' & deaths > 90) |
#                                                               (date == max(date) & country == 'Italia' &
#                                                                  ccaa != 'Liguria' & days_since_start_deaths > 10)
#                                                           ))
# # Get differential label data based on what to be emphasized
# label_data_big <- label_data %>%
#   filter(ccaa %in% c('Madrid', 'Lombardia', 'Hubei'))
# label_data_small <- label_data %>%
#   filter(!ccaa %in% c('Madrid', 'Lombardia', 'Hubei'))
# 
pd_big <- pd %>%
  filter(ccaa %in% c('Madrid', 'Lombardia', 'Hubei'))
pd_small <- pd %>%
  filter(!ccaa %in% c('Madrid', 'Lombardia', 'Hubei'))



# cols <- colorRampPalette(RColorBrewer::brewer.pal(n = 8, name = 'Set2'))(length(unique(pd$country)))
# cols <- rainbow(3)
cols <- c( '#FF6633',  'darkred', '#006666')

the_dir <- '/tmp/animation/'
dir.create(the_dir)
the_dates <- sort(unique(c(pd_big$date, pd_small$date)))
for(i in 1:length(the_dates)){
  
  the_date <- the_dates[i]
  pd_big_sub <- pd_big %>% filter(date <= the_date)
  pd_big_current <- pd_big_sub %>% filter(date == the_date)
  pd_small_sub <- pd_small %>% filter(date <= the_date)
  pd_small_current <- pd_small_sub %>% filter(date == the_date)

  label_data_big <-
    pd_big_sub %>%
    filter(ccaa %in% c('Lombardia', 'Madrid', 'Hubei')) %>%
    group_by(ccaa) %>%
    filter(date == max(date)) %>%
    ungroup %>%
    mutate(days_since_start_deaths = ifelse(ccaa == 'Hubei' &
                                              days_since_start_deaths >32,
                                            32,
                                            days_since_start_deaths))
  
  label_data_small <-
    pd_small_sub %>%
    filter(ccaa %in% c('Emilia Romagna',
                       'Cataluña',
                       'CLM',
                       'País Vasco',
                       'Veneto',
                       'Piemonte',
                       'Henan',
                       'Heilongjiang')) %>%
    group_by(ccaa) %>%
    filter(date == max(date))

  n_countries <- length(unique(pd_big_sub$country))
  if(n_countries == 3){
    sub_cols  <- cols
  }
  if(n_countries == 2){
    sub_cols <- cols[c(1,3)]
  }
   if(n_countries == 1){
    sub_cols <- cols[1]
  }
  g <- ggplot(data = pd_big_sub,
       aes(x = days_since_start_deaths,
           y = deaths,
           group = ccaa)) +
  geom_line(aes(color = country),
            alpha = 0.9,
            size = 2) +
  geom_line(data = pd_small_sub,
            aes(x = days_since_start_deaths,
                y = deaths,
                color = country),
            alpha = 0.7,
            size = 1) +
    geom_point(data = pd_big_current,
               aes(x = days_since_start_deaths,
                y = deaths,
                color = country),
               size = 3) +
    geom_point(data = pd_small_current,
               aes(x = days_since_start_deaths,
                y = deaths,
                color = country),
               size = 1, alpha = 0.6) +
    scale_y_log10(limits = c(5, 4500)) +
  scale_color_manual(name = '',
                     values = sub_cols) +
  theme_simple() +
  theme(legend.position = 'top') +
  labs(x = 'Dias desde el primer día con 5 o más muertes acumuladas',
       y = 'Muertes',
       title = format(the_date, '%d %b'),
       subtitle = 'Muertes por COVID-19',
       caption = '@joethebrew | www.databrew.cc') +
  geom_text(data = label_data_big,
            aes(x = days_since_start_deaths + 1,
                y = deaths,
                hjust = 0,
                label = gsub(' ', '\n', ccaa),
                color = country),
            size = 8,
            alpha = 0.9,
            show.legend = FALSE) +
    geom_text(data = label_data_small,
            aes(x = days_since_start_deaths + 1.6,
                y = deaths ,
                label = ccaa,
                # label = gsub(' ', '\n', ccaa),
                color = country),
            size = 5,
            alpha = 0.6,
            show.legend = FALSE) +
  theme(axis.text = element_text(size = 14),
        axis.title = element_text(size = 20),
        legend.text = element_text(size = 16),
        plot.title = element_text(size = 35),
        plot.subtitle = element_text(size = 24))  +
  xlim(0, 38) 
  message(i)
  ggsave(paste0(the_dir, add_zero(i, 3), '.png'),
         height = 7,
         width = 10.5)
}
# Command line
cd /tmp/animation
mogrify -resize 50% *.png
convert -delay 20 -loop 0 *.png result.gif

ANIMATION: Spain only

# Spanish data
a <- esp_df %>%
  left_join(esp_pop) %>%
  mutate(country = 'Spain')
joined <- a
# Get deaths per milllion
joined$deaths_pm <- joined$deaths / joined$pop * 1000000
joined$cases_pm <- joined$cases / joined$pop * 1000000

# Get the days since paradigm
x_deaths <- 5
x_deaths_pm <- 5
x_cases <- 50
x_cases_pm <- 50
joined <- joined %>%
  arrange(date) %>%
  group_by(ccaa) %>%
  mutate(start_deaths = min(date[deaths >= x_deaths]),
         start_cases = min(date[cases >= x_cases]),
         start_deaths_pm = min(date[deaths_pm >= x_deaths_pm]),
         start_cases_pm = min(date[cases_pm >= x_cases_pm])) %>%
  ungroup %>%
  mutate(days_since_start_deaths = date - start_deaths,
         days_since_start_cases = date - start_cases,
         days_since_start_deaths_pm = date - start_deaths_pm,
         days_since_start_cases_pm = date - start_cases_pm) 

# Define plot data
pd <- joined %>% filter(days_since_start_deaths >= 0) %>%
  mutate(country = ifelse(country == 'China',
                          'China',
                          ifelse(country == 'Italy', 'Italia', 'España')))

bigs <- c('Madrid', 'Cataluña', 'CLM', 'CyL', 'País Vasco', 'La Rioja')
pd_big <- pd %>%
  filter(ccaa %in% bigs)
pd_small <- pd %>%
  filter(!ccaa %in% bigs)



# cols <- colorRampPalette(RColorBrewer::brewer.pal(n = 8, name = 'Set2'))(length(unique(pd$country)))
# cols <- rainbow(3)
cols <- colorRampPalette(c('#A16928','#bd925a','#d6bd8d','#edeac2', '#b5c8b8','#79a7ac','#2887a1'))(length(unique(pd$country)))

the_dir <- '/tmp/animation2/'
dir.create(the_dir)
the_dates <- sort(unique(c(pd_big$date, pd_small$date)))
for(i in 1:length(the_dates)){
  
  the_date <- the_dates[i]
  pd_big_sub <- pd_big %>% filter(date <= the_date)
  pd_big_current <- pd_big_sub %>% filter(date == the_date)
  pd_small_sub <- pd_small %>% filter(date <= the_date)
  pd_small_current <- pd_small_sub %>% filter(date == the_date)

  label_data_big <-
    pd_big_sub %>%
    filter(ccaa %in% bigs) %>%
    group_by(ccaa) %>%
    filter(date == max(date)) %>%
    ungroup
  
  label_data_small <-
    pd_small_sub %>%
    group_by(ccaa) %>%
    filter(date == max(date))
# sub_cols <- colorRampPalette(c('#A16928','#bd925a','#d6bd8d','#edeac2', '#b5c8b8','#79a7ac','#2887a1'))(length(unique(pd$ccaa)))
  sub_cols <- colorRampPalette(RColorBrewer::brewer.pal(n = 8, name = 'Dark2'))(length(unique(pd$ccaa)))
  # sub_cols <- rainbow((length(unique(pd$ccaa))))
  
  g <- ggplot(data = pd_big_sub,
       aes(x = days_since_start_deaths,
           y = deaths,
           group = ccaa)) +
  geom_line(aes(color = ccaa),
            alpha = 0.9,
            size = 2) +
  geom_line(data = pd_small_sub,
            aes(x = days_since_start_deaths,
                y = deaths,
                color = ccaa),
            alpha = 0.7,
            size = 1) +
    geom_point(data = pd_big_current,
               aes(x = days_since_start_deaths,
                y = deaths,
                color = ccaa),
               size = 3) +
    geom_point(data = pd_small_current,
               aes(x = days_since_start_deaths,
                y = deaths,
                color = ccaa),
               size = 1, alpha = 0.6) +
    geom_point(data = pd,
               aes(x = days_since_start_deaths,
                y = deaths,
                color = ccaa),
               size = 1, alpha = 0.01) +
    scale_y_log10(limits = c(5, max(pd$deaths)*1.2),
                  breaks = c(10, 50, 100, 500, 1000)) +
  scale_color_manual(name = '',
                     values = sub_cols) +
  theme_simple() +
  theme(legend.position = 'top') +
  labs(x = 'Dias desde el primer día con 5 o más muertes acumuladas',
       y = 'Muertes',
       title = format(the_date, '%d %b'),
       subtitle = 'Muertes por COVID-19',
       caption = '@joethebrew | www.databrew.cc')   +
  theme(axis.text = element_text(size = 14),
        axis.title = element_text(size = 20),
        legend.text = element_text(size = 16),
        plot.title = element_text(size = 35),
        plot.subtitle = element_text(size = 24))  +
  xlim(0, 20) +
    theme(legend.position = 'none')
  message(i)
  if(nrow(label_data_big) > 0){
    g <- g +
      geom_text(data = label_data_big,
            aes(x = days_since_start_deaths + 0.2,
                y = deaths,
                hjust = 0,
                label = gsub(' ', ' ', ccaa),
                color = ccaa),
            size = 8,
            alpha = 0.9,
            show.legend = FALSE) +
    geom_text(data = label_data_small,
            aes(x = days_since_start_deaths + 0.2,
                y = deaths ,
                label = ccaa,
                # label = gsub(' ', '\n', ccaa),
                color = ccaa),
            size = 5,
            alpha = 0.6,
            show.legend = FALSE)
  }
  
  ggsave(paste0(the_dir, add_zero(i, 3), '.png'),
         height = 7,
         width = 12)
}
# Command line
cd /tmp/animation
mogrify -resize 50% *.png
convert -delay 25 -loop 0 *.png result.gif

Italy regions for Spanish regions

# Spanish data
a <- esp_df %>%
  left_join(esp_pop) %>%
  mutate(country = 'Spain')
# Italian data
b <- ita %>%
  left_join(ita_pop) %>%
  mutate(country = 'Italy')
# join
joined <- bind_rows(a, b)
# Get deaths per milllion
joined$deaths_pm <- joined$deaths / joined$pop * 1000000
joined$cases_pm <- joined$cases / joined$pop * 1000000

# Get the days since paradigm
x_deaths <- 5
x_deaths_pm <- 5
x_cases <- 50
x_cases_pm <- 50
joined <- joined %>%
  arrange(date) %>%
  group_by(ccaa) %>%
  mutate(start_deaths = min(date[deaths >= x_deaths]),
         start_cases = min(date[cases >= x_cases]),
         start_deaths_pm = min(date[deaths_pm >= x_deaths_pm]),
         start_cases_pm = min(date[cases_pm >= x_cases_pm])) %>%
  ungroup %>%
  mutate(days_since_start_deaths = date - start_deaths,
         days_since_start_cases = date - start_cases,
         days_since_start_deaths_pm = date - start_deaths_pm,
         days_since_start_cases_pm = date - start_cases_pm) 

ggplot(data = joined %>% filter(days_since_start_deaths_pm >= 0),
       aes(x = days_since_start_deaths_pm,
           y = deaths_pm,
           group = ccaa)) +
  geom_line(aes(color = country),
            alpha = 0.8,
            size = 2) +
  scale_y_log10() +
  scale_color_manual(name = '',
                     values = c('darkorange', 'purple')) +
  theme_simple() +
  theme(legend.position = 'none') +
  labs(x = 'Days since "start out outbreak"',
       y = 'Deaths per million',
       title = 'Deaths per capita, Italian regions vs. Spanish autonomous communities',
       subtitle = paste0('Day 0 ("start of outbreak") = first day at ', x_deaths_pm, ' or greater cumulative deaths per million'),
       caption = '@joethebrew | www.databrew.cc') +
  geom_text(data = joined %>% group_by(ccaa) %>% filter(date == max(date) & 
                                                          (
                                                            (country == 'Spain' & deaths_pm > 25) |
                                                              (country == 'Italy' & days_since_start_deaths_pm > 10)
                                                          )),
            aes(x = days_since_start_deaths_pm + 0.6,
                y = deaths_pm,
                label = gsub(' ', '\n', ccaa),
                color = country),
            size = 6) +
  theme(axis.text = element_text(size = 14),
        axis.title = element_text(size = 20)) +
  xlim(0, 23)

ggsave('/tmp/italy_comparison.png',
       height = 6,
       width = 10)


# Separate for Catalonia
pd <- joined %>% filter(days_since_start_deaths_pm >= 0) %>%
         mutate(country = ifelse(ccaa == 'Cataluña',
                                 'Catalonia',
                                 country)) %>%
  mutate(ccaa = ifelse(ccaa == 'Cataluña', 'Catalunya', ccaa))
pdcat <- pd %>% filter(country == 'Catalonia')
label_data <- pd %>% group_by(ccaa) %>% filter(date == max(date) & 
                                                          (
                                                            (country == 'Catalonia') |
                                                            (country == 'Spain' & deaths_pm > 25) |
                                                              (country == 'Italy' & days_since_start_deaths_pm > 10)
                                                          ))
ggplot(data = pd,
       aes(x = days_since_start_deaths_pm,
           y = deaths_pm,
           group = ccaa)) +
  geom_line(aes(color = country),
            alpha = 0.3,
            size = 1.5) +
    geom_line(data = pdcat,
              aes(color = country),
            alpha = 0.8,
            size = 2) +
      geom_point(data = pdcat %>% filter(date == max(date)),
              aes(color = country),
            alpha = 0.8,
            size = 4) +
  scale_y_log10() +
  scale_color_manual(name = '',
                     values = c('darkred', 'darkorange', "purple")) +
  theme_simple() +
  theme(legend.position = 'none') +
  labs(x = 'Dies des del "començament del brot"',
       y = 'Morts per milió',
       title = 'Morts per càpita: Catalunya, comunitats autònomes, regions italianes',
       subtitle = paste0('Dia 0 ("començament del brot") = primer dia a ', x_deaths_pm, ' o més morts acumulades per milió de població'),
       caption = '@joethebrew | www.databrew.cc') +
  geom_text(data = label_data,
            aes(x = days_since_start_deaths_pm +0.2 ,
                y = deaths_pm +3,
                hjust = 0,
                label = gsub(' ', '\n', ccaa),
                color = country),
            size = 6,
            alpha = 0.7) +
  theme(axis.text = element_text(size = 14),
        axis.title = element_text(size = 20)) +
  xlim(0, 24)

ggsave('/tmp/cat_italy_comparison.png',
       height = 6,
       width = 10)


# Straightforward Lombardy, Madrid, Cat comparison
specials <- c('Lombardia', 'Madrid')
pd <- joined %>% filter(days_since_start_deaths_pm >= 0) %>%
         mutate(country = ifelse(ccaa == 'Cataluña',
                                 'Catalonia',
                                 country)) %>%
  mutate(ccaa = ifelse(ccaa == 'Cataluña', 'Catalunya', ccaa))
pdcat <- pd %>% filter(ccaa %in%  specials)
label_data <- pd %>% group_by(ccaa) %>% filter(date == max(date) & 
                                                          (
                                                            # (country == 'Catalonia') |
                                                            (country == 'Spain' & deaths_pm > 20) |
                                                              (country == 'Italy' & days_since_start_deaths_pm >= 10)
                                                          ))
ggplot(data = pd,
       aes(x = days_since_start_deaths_pm,
           y = deaths_pm,
           group = ccaa)) +
  geom_line(aes(color = country),
            alpha = 0.3,
            size = 1.5) +
    geom_line(data = pdcat,
              aes(color = country),
            alpha = 0.8,
            size = 2) +
  scale_y_log10() +
  scale_color_manual(name = '',
                     values = c('darkred', 'darkorange', "purple")) +
  theme_simple() +
  theme(legend.position = 'none') +
  labs(x = 'Dias desde "el comienzo del brote"',
       y = 'Muertes por millón de habitantes',
       title = 'Muertes acumuladas por 1.000.000 habitantes',
       subtitle = paste0('Dia 0 ("comienzo del brote") = primer día a ', x_deaths_pm, ' o más muertes acumuladas por milión de población'),
       caption = '@joethebrew | www.databrew.cc') +
  geom_text(data = label_data %>% filter(!ccaa %in% specials),
            aes(x = days_since_start_deaths_pm + 0.4,
                y = deaths_pm +3,
                label = gsub(' ', '\n', ccaa),
                color = country),
            size = 5,
            alpha = 0.5) +
    geom_text(data = label_data %>% filter(ccaa %in% specials),
            aes(x = days_since_start_deaths_pm ,
                y = deaths_pm +30,
                label = gsub(' ', '\n', ccaa),
                color = country),
            size = 8,
            alpha = 0.8) +
  theme(axis.text = element_text(size = 14),
        axis.title = element_text(size = 20)) +
  xlim(0, 23)

ggsave('/tmp/mad_lom_italy_comparison.png',
       height = 6,
       width = 10)

Loop for regions of the world

isos <- sort(unique(world_pop$sub_region))
isos <- c('Central Asia', 'Eastern Asia', 'Eastern Europe',
          'Latin America and the Caribbean',
          'Northern Africa', 'Northern America',
          'Nothern Europe',
          'South-eastern Asia',
          'Southern Asia', 'Southern Europe',
          'Sub-Saharan Africa', 'Western Asia', 'Western Europe')
dir.create('/tmp/world')
for(i in 1:length(isos)){
  this_iso <- isos[i]
  message(i, ' ', this_iso)
  countries <- world_pop %>% filter(sub_region == this_iso)
  pd <- df %>%
    filter(!country %in% c('Guyan
                           a', 'Bahamas', 'The Bahamas')) %>%
          group_by(country, iso, date) %>%
          summarise(cases = sum(cases, na.rm = TRUE)) %>%
    ungroup %>%
    group_by(country) %>%
         filter(length(which(cases > 0)) > 1) %>%
    ungroup %>%
         filter(iso %in% countries$iso)
  if(nrow(pd) > 0){
    cols <- colorRampPalette(brewer.pal(n = 8, 'Spectral'))(length(unique(pd$country)))
cols <- sample(cols, length(cols))
    # Plot
n_countries <- (length(unique(pd$country)))
ggplot(data = pd,
       aes(x = date,
           # color = country,
           # fill = country,
           y = cases)) +
  theme_simple() +
  # geom_point() +
  # geom_line() +
  geom_area(fill = 'darkred', alpha = 0.3, color = 'darkred') +
  # scale_color_manual(name = '',
  #                    values = cols) +
  # scale_fill_manual(name = '',
  #                   values = cols) +
  theme(legend.position = 'none',
        axis.text = element_text(size = 6),
        strip.text = element_text(size = ifelse(n_countries > 20, 6,
                                                ifelse(n_countries > 10, 10,
                                                       ifelse(n_countries > 5, 11, 12))) ),
        legend.text = element_text(size = 6)) +
  # scale_y_log10() +
  facet_wrap(~country,
             scales = 'free') +
  labs(x = '',
       y = 'Confirmed cases',
       title = paste0('Confirmed cases of COVID-19 in ', this_iso)) 
  ggsave(paste0('/tmp/world/', this_iso, '.png'),
         width = 12, 
         height = 7)
  }



}

Rolling average new events

plot_data <- df_country %>% filter(country %in% c('Spain', 'France', 'Italy', 'Germany', 'Belgium', 'Norway')) %>% mutate(geo = country)
roll_curve(plot_data, pop = T)

dir.create('/tmp/countries')
roll_curve_country <- function(the_country = 'Spain'){
  plot_data <- df_country %>% filter(country %in% the_country) %>% mutate(geo = country)
  g1 <- roll_curve(plot_data, pop = F)
  g2 <- roll_curve(plot_data, pop = T)
  g3 <- roll_curve(plot_data, pop = F, deaths = T)
  g4 <- roll_curve(plot_data, pop = T, deaths = T)
  ggsave(paste0('/tmp/countries/', the_country, '1.png'), g1)
  ggsave(paste0('/tmp/countries/', the_country, '2.png'), g2)
  ggsave(paste0('/tmp/countries/', the_country, '3.png'), g3)
  ggsave(paste0('/tmp/countries/', the_country, '4.png'), g4)
}


countries <- c('Spain', 'France', 'Italy', 'Germany', 'Belgium', 'Norway', 'US', 'United Kingdom', 'Korea, South',
  'China', 'Japan', 'Switzerland', 'Sweden', 'Denmark', 'Netherlands', 'Iran', 'Canada')
for(i in 1:length(countries)){
  roll_curve_country(the_country = countries[i])
}
Error in ts(x): 'ts' object must have one or more observations
# Cases
plot_data <- df_country %>% filter(country == 'Spain') %>% mutate(geo = country)
roll_curve(plot_data)

# Cases adjusted
plot_data <- df_country %>% filter(country == 'Spain') %>% mutate(geo = country)
roll_curve(plot_data, pop = T)

# Deaths
plot_data <- df_country %>% filter(country == 'Spain') %>% mutate(geo = country)
roll_curve(plot_data, deaths = T)

# Cases adjusted
plot_data <- df_country %>% filter(country == 'Spain') %>% mutate(geo = country)
roll_curve(plot_data, pop = T, deaths = T)

plot_data <- esp_df  %>% mutate(geo = ccaa)

roll_curve(plot_data, pop = T, deaths = T)

plot_data <- df_country %>% filter(country == 'Spain') %>% mutate(geo = country)

roll_curve(plot_data, deaths = T)

# Latest in Spain
pd <- esp_df %>%
  filter(date == max(date)) %>%
  mutate(p = deaths / sum(deaths) * 100)
text_size <- 12

# deaths
ggplot(data = pd,
       aes(x = ccaa,
           y = deaths)) +
  geom_bar(stat = 'identity',
           fill = 'black') +
  theme_simple() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
  labs(x = '',
       y = 'Deaths | Muertes',
       title = 'COVID-19 deaths in Spain',
       subtitle = paste0('Data as of ', max(pd$date)),
       caption = 'github.com/databrew/covid19 | joe@databrew.cc') +
  theme(legend.position = 'top',
        legend.text = element_text(size = text_size * 2),
        axis.title = element_text(size = text_size * 2),
        plot.title = element_text(size = text_size * 2.3),
        axis.text.x = element_text(size = text_size * 1.5)) +
  geom_text(data = pd %>% filter(deaths > 0),
            aes(x = ccaa,
                y = deaths,
                label = paste0(deaths, '\n(',
                               round(p, digits = 1), '%)')),
            size = text_size * 0.3,
            nudge_y = 180) +
  ylim(0, max(pd$deaths * 1.1))
Error: Aesthetics must be either length 1 or the same as the data (1): x and y

ggsave('/tmp/spain.png')
Error: Aesthetics must be either length 1 or the same as the data (1): x and y

Muertes relativas por CCAA

# Latest in Spain
pd <- esp_df %>%
  filter(date == max(date)) %>%
  mutate(p = deaths / sum(deaths) * 100)

pd <- pd %>% left_join(esp_pop)
text_size <- 12
pd$value <- pd$deaths / pd$pop * 100000

# deaths
ggplot(data = pd,
       aes(x = ccaa,
           y = value)) +
  geom_bar(stat = 'identity',
           fill = 'black') +
  theme_simple() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
  labs(x = '',
       y = 'Deaths per 100,000',
       title = 'COVID-19 deaths per 100.000',
       subtitle = paste0('Data as of ', max(pd$date)),
       caption = 'github.com/databrew/covid19 | joe@databrew.cc') +
  theme(legend.position = 'top',
        legend.text = element_text(size = text_size * 2),
        axis.title = element_text(size = text_size * 2),
        plot.title = element_text(size = text_size * 2.3),
        axis.text.x = element_text(size = text_size * 1.5)) +
  geom_text(data = pd %>% filter(value > 0),
            aes(x = ccaa,
                y = value,
                label = paste0(round(value, digits = 2), '\n(',
                               deaths, '\ndeaths)')),
            size = text_size * 0.3,
            nudge_y = 4.5) +
  ylim(0, max(pd$value) * 1.2)
Error: Aesthetics must be either length 1 or the same as the data (1): x and y

ggsave('/tmp/spai2.png')
Error: Aesthetics must be either length 1 or the same as the data (1): x and y

Just yesterday

# Latest in Spain
pd <- esp_df %>%
  filter(date == max(date)) %>%
  mutate(p = deaths_non_cum / sum(deaths_non_cum) * 100)
text_size <- 12

# deaths
ggplot(data = pd,
       aes(x = ccaa,
           y = deaths_non_cum)) +
  geom_bar(stat = 'identity',
           fill = 'black') +
  theme_simple() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
  labs(x = '',
       y = 'Deaths',
       title = 'COVID-19 deaths in Spain',
       subtitle = paste0('Data only for ', max(pd$date)),
       caption = 'github.com/databrew/covid19 | joe@databrew.cc') +
  theme(legend.position = 'top',
        legend.text = element_text(size = text_size * 2),
        axis.title = element_text(size = text_size * 2),
        plot.title = element_text(size = text_size * 2.3),
        axis.text.x = element_text(size = text_size * 1.5)) +
  geom_text(data = pd %>% filter(deaths_non_cum > 0),
            aes(x = ccaa,
                y = deaths_non_cum,
                label = paste0(deaths_non_cum, '\n(',
                               round(p, digits = 1), '%)')),
            size = text_size * 0.3,
            nudge_y = 30) +
  ylim(0, max(pd$deaths_non_cum * 1.1))
Error: Aesthetics must be either length 1 or the same as the data (1): x and y

ggsave('/tmp/spain_non_cum.png')
Error: Aesthetics must be either length 1 or the same as the data (1): x and y

Muertes relativas por CCAA ayer SOLO

# Latest in Spain
pd <- esp_df %>%
  filter(date == max(date)) %>%
  mutate(p = deaths_non_cum / sum(deaths_non_cum) * 100)

pd <- pd %>% left_join(esp_pop)
text_size <- 12
pd$value <- pd$deaths_non_cum / pd$pop * 100000

# deaths
ggplot(data = pd,
       aes(x = ccaa,
           y = value)) +
  geom_bar(stat = 'identity',
           fill = 'black') +
  theme_simple() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
  labs(x = '',
       y = 'Deaths per 100,000',
       title = 'COVID-19 deaths per 100.000',
       subtitle = paste0('Data as of ', max(pd$date)),
       caption = 'github.com/databrew/covid19 | joe@databrew.cc') +
  theme(legend.position = 'top',
        legend.text = element_text(size = text_size * 2),
        axis.title = element_text(size = text_size * 2),
        plot.title = element_text(size = text_size * 2.3),
        axis.text.x = element_text(size = text_size * 1.5)) +
  geom_text(data = pd %>% filter(value > 0),
            aes(x = ccaa,
                y = value,
                label = paste0(round(value, digits = 2), '\n(',
                               deaths_non_cum, '\ndeaths)')),
            size = text_size * 0.3,
            nudge_y = 1) +
  ylim(0, max(pd$value) * 1.3)
Error: Aesthetics must be either length 1 or the same as the data (1): x and y

ggsave('/tmp/spain_ayer_adj.png')
Error: Aesthetics must be either length 1 or the same as the data (1): x and y
plot_data <- esp_df %>% mutate(geo = ccaa) %>% filter(!ccaa %in% c('Melilla'))
roll_curve(plot_data, scales = 'fixed')

ggsave('/tmp/a.png',
       width = 1280 / 150,
       height = 720 / 150)

Loop for everywhere (see desktop)

dir.create('/tmp/ccaas')
ccaas <- sort(unique(esp_df$ccaa))
for(i in 1:length(ccaas)){
  this_ccaa <- ccaas[i]
  plot_data <- esp_df %>% mutate(geo = ccaa) %>% filter(ccaa == this_ccaa)
  roll_curve(plot_data, scales = 'fixed')  + theme(strip.text = element_text(size = 30))
  ggsave(paste0('/tmp/ccaas/', i, this_ccaa, '_cases.png'),
         width = 1280 / 150,
         height = 720 / 150)
}

ccaas <- sort(unique(esp_df$ccaa))
for(i in 1:length(ccaas)){
  this_ccaa <- ccaas[i]
  plot_data <- esp_df %>% mutate(geo = ccaa) %>% filter(ccaa == this_ccaa)
  roll_curve(plot_data, scales = 'fixed', pop = TRUE)  + theme(strip.text = element_text(size = 30))
  ggsave(paste0('/tmp/ccaas/', i, this_ccaa, '_cases_pop.png'),
         width = 1280 / 150,
         height = 720 / 150)
}

# Deaths too
for(i in 1:length(ccaas)){
  this_ccaa <- ccaas[i]
  plot_data <- esp_df %>% mutate(geo = ccaa) %>% filter(ccaa == this_ccaa)
  roll_curve(plot_data, deaths = T, scales = 'fixed') + theme(strip.text = element_text(size = 30))
  ggsave(paste0('/tmp/ccaas/', i, this_ccaa, '_deaths.png'),
         width = 1280 / 150,
         height = 720 / 150)
}

# Deaths too
for(i in 1:length(ccaas)){
  this_ccaa <- ccaas[i]
  plot_data <- esp_df %>% mutate(geo = ccaa) %>% filter(ccaa == this_ccaa)
  roll_curve(plot_data, deaths = T, scales = 'fixed', pop = TRUE)  + theme(strip.text = element_text(size = 30))
  ggsave(paste0('/tmp/ccaas/', i, this_ccaa, '_deaths_pop.png'),
         width = 1280 / 150,
         height = 720 / 150)
}
plot_data <- esp_df %>% mutate(geo = ccaa) %>% filter(!ccaa %in% c('Melilla'))
roll_curve(plot_data, scales = 'free_y')

ggsave('/tmp/b.png',
       width = 1280 / 150,
       height = 720 / 150)
plot_data <- esp_df %>% mutate(geo = ccaa) %>% filter(!ccaa %in% c('Melilla'))
roll_curve(plot_data, deaths = T, scales = 'free_y')

ggsave('/tmp/c.png',
       width = 1280 / 150,
       height = 720 / 150)
plot_data <- esp_df %>% mutate(geo = ccaa) %>% filter(!ccaa %in% c('Melilla'))
roll_curve(plot_data, deaths = T, scales = 'fixed')

ggsave('/tmp/d.png',
       width = 1280 / 150,
       height = 720 / 150)
plot_data <- df_country %>% filter(country %in% c('Spain', 'Italy', 'Germany', 'France', 'US',
                                                  'China', 'Korea, South', 'Japan', 'Singapore')) %>% mutate(geo = country)
roll_curve(plot_data, scales = 'free_y')

ggsave('/tmp/z.png',
       width = 1280 / 150,
       height = 720 / 150)

World at large

pd <- df_country %>%
  group_by(date) %>%
  summarise(n = sum(cases)) %>%
  filter(date < max(date))
ggplot(data = pd,
       aes(x = date,
           y = n)) +
  geom_point() +
  theme_simple() +
  labs(x = 'Date',
       y = 'Cases',
       title = 'COVID-19 cases')

ggsave('~/Videos/update/a.png',
       width = 1280 / 150,
       height = 720 / 150)
Error in grid.newpage(): could not open file '/home/joebrew/Videos/update/a.png'

China vs world

pd <- df_country %>%
  group_by(date,
           country = ifelse(country == 'China', 'China', 'Other countries')) %>%
  summarise(n = sum(cases))  %>%
  ungroup %>%
  filter(date < max(date))
Error: Column `country` can't be modified because it's a grouping variable
ggplot(data = pd,
       aes(x = date,
           y = n,
           color = country)) +
  geom_line(size = 2) +
  # geom_point() +
  theme_simple() +
  labs(x = 'Date',
       y = 'Cases',
       title = 'COVID-19 cases') +
  scale_color_manual(name = '',
                     values = c('red', 'black')) +
  theme(legend.text = element_text(size = 25),
        legend.position = 'top')
Error in FUN(X[[i]], ...): object 'country' not found

ggsave('~/Videos/update/b.png',
       width = 1280 / 150,
       height = 720 / 150)
Error in grid.newpage(): could not open file '/home/joebrew/Videos/update/b.png'

NOn china only

pd <- df_country %>%
  group_by(date,
           country = ifelse(country == 'China', 'China', 'Other countries')) %>%
  summarise(n = sum(cases)) %>%
  filter(country == 'Other countries')  %>%
  ungroup %>%
  filter(date < max(date))
Error: Column `country` can't be modified because it's a grouping variable
ggplot(data = pd,
       aes(x = date,
           y = n)) +
  geom_line(size = 2) +
  # geom_point() +
  theme_simple() +
  labs(x = 'Date',
       y = 'Cases',
       title = 'COVID-19 cases, outside of China') 

ggsave('~/Videos/update/c.png',
       width = 1280 / 150,
       height = 720 / 150)
Error in grid.newpage(): could not open file '/home/joebrew/Videos/update/c.png'

Case numbers across countries

plot_day_zero(countries = c('France', 'Germany', 'Italy', 'Spain', 'Switzerland', 'Sweden', 'Norway', 'Netherlands'))

# ggsave('~/Videos/update/d.png',
#        width = 1280 / 150,
#        height = 720 / 150)

World at large - deaths

pd <- df_country %>%
  group_by(date) %>%
  summarise(n = sum(deaths)) %>%
  filter(date < max(date))
ggplot(data = pd,
       aes(x = date,
           y = n)) +
  geom_point() +
  theme_simple() +
  labs(x = 'Date',
       y = 'Deaths',
       title = 'COVID-19 deaths')

# ggsave('~/Videos/update/e.png',
#        width = 1280 / 150,
#        height = 720 / 150)

China vs world deaths

pd <- df_country %>%
  group_by(date,
           country = ifelse(country == 'China', 'China', 'Other countries')) %>%
  summarise(n = sum(deaths))  %>%
  ungroup %>%
  filter(date < max(date))
Error: Column `country` can't be modified because it's a grouping variable
ggplot(data = pd,
       aes(x = date,
           y = n,
           color = country)) +
  geom_line(size = 2) +
  # geom_point() +
  theme_simple() +
  labs(x = 'Date',
       y = 'Deaths',
       title = 'COVID-19 deaths') +
  scale_color_manual(name = '',
                     values = c('red', 'black')) +
  theme(legend.text = element_text(size = 25),
        legend.position = 'top')
Error in FUN(X[[i]], ...): object 'country' not found

# ggsave('~/Videos/update/f.png',
#        width = 1280 / 150,
#        height = 720 / 150)

Asian hope

plot_day_zero(countries = c('Korea, South', 'Japan', 'China', 'Singapore'))

# ggsave('~/Videos/update/g.png',
#        width = 1280 / 150,
#        height = 720 / 150)

Since trajectories are very unstable when cases are low, we’ll exclude from our analysis the first few days, and will only count as “outbreak” once a country reaches 150 or more cumulative cases.

# Doubling time
n_cases_start = 150
countries = c('Italy', 'Spain', 'France', 'Germany', 'Italy', 'Switzerland', 'Denmark', 'US', 'United Kingdom', 'Norway')
# countries <- sort(unique(df_country$country))
out_list <- curve_list <-  list()
counter <- 0
for(i in 1:length(countries)){
  message(i)
  this_country <- countries[i]
  sub_data <-df_country %>% filter(country == this_country)
  # Only calculate on countries with n_cases_start or greater cases,
  # starting at the first day at n_cases_start or greater
  ok <- max(sub_data$cases, na.rm = TRUE) >= n_cases_start
  if(ok){
    counter <- counter + 1
    pd <- sub_data %>%
      filter(!is.na(cases)) %>%
      mutate(start_date = min(date[cases >= n_cases_start])) %>%
      mutate(days_since = date - start_date) %>%
      filter(days_since >= 0) %>%
      mutate(days_since = as.numeric(days_since))
    fit <- lm(log(cases) ~ days_since, data = pd) 
    # plot(pd$days_since, log(pd$cases))
    # abline(fit)
    ## Slope
    curve <- fit$coef[2]
    
    # Predict days ahead
    fake <- tibble(days_since = seq(0, max(pd$days_since) + 5, by = 1))
    fake <- left_join(fake, pd %>% dplyr::select(days_since, cases, date))
    fake$predicted <- exp(predict(fit, newdata = fake))
    
    # Doubling time
    dt <- log(2)/fit$coef[2]
    out <- tibble(country = this_country,
                  doubling_time = dt,
                  slope = curve)
    out_list[[counter]] <- out
    curve_list[[counter]] <- fake %>% mutate(country = this_country)
  }
}
done <- bind_rows(out_list)
print(done)
# A tibble: 10 x 3
   country        doubling_time  slope
   <chr>                  <dbl>  <dbl>
 1 Italy                   7.17 0.0966
 2 Spain                   6.04 0.115 
 3 France                  5.99 0.116 
 4 Germany                 6.10 0.114 
 5 Italy                   7.17 0.0966
 6 Switzerland             8.12 0.0854
 7 Denmark                11.1  0.0625
 8 US                      4.37 0.159 
 9 United Kingdom          5.21 0.133 
10 Norway                 12.1  0.0571
curves <- bind_rows(curve_list)
# Get curves back in exponential form
# curves$curve <- exp(curves$curve)

# Join doubling time to curves
joined <- left_join(curves, done)

# Get rid of Italy future (since it's the "leader")
joined <- joined %>%
  filter(country != 'Italy' |
           date <= (Sys.Date() -1))


# Make long format
long <- joined %>% 
  dplyr::select(date, days_since, country, cases, predicted, doubling_time) %>%
  tidyr::gather(key, value, cases:predicted) %>%
  mutate(key = Hmisc::capitalize(gsub('_', ' ', key))) %>%
  mutate(key = ifelse(key == 'Predicted', 'Predicted (based on current doubling time)', key))

The below chart shows the trajectories in terms of number of cases in Europe in red, and the predicted trajectories in black. The black line assumes that the doubling rate will stay constant.

cols <- c('red', 'black')
ggplot(data = long,
       aes(x = days_since,
           y = value,
           lty = key,
           color = key)) +
  geom_line(data = long %>% filter(key != 'Confirmed cases'),
            size = 1.2, alpha = 0.8) +
  geom_point(data = long %>% filter(key == 'Confirmed cases')) +
  geom_line(data = long %>% filter(key == 'Confirmed cases'),
            size = 0.8) +
  facet_wrap(~paste0(country, '\n',
                     '(doubling time: ', 
                     round(doubling_time, digits = 1), ' days)'), scales = 'free') +
  theme_simple() +
  scale_linetype_manual(name ='',
                        values = c(1,2)) +
  scale_color_manual(name = '',
                     values = cols) +
  theme(legend.position = 'top') +
  labs(x = 'Days since first day at >150 cumulative cases',
       y = 'Cases',
       title = 'COVID-19 CASES: ("predicted" assumes no change in doubling time)',
       caption = 'Data from Johns Hopkins. Processing: Joe Brew @joethebrew. Code: github.com/databrew/covid19',
       subtitle = '(Doubling time calculated since first day at >150 cumulative cases)') +
    theme(strip.text = element_text(size = 13),
          plot.title = element_text(size = 15))

Since Italy is “leading the way”, it’s helpful to also compare each country to Italy. Let’s see that.

# Overlay Italy
ol1 <- joined %>% filter(!country %in% 'Italy')
ol2 <- joined %>% filter(country == 'Italy') %>% dplyr::rename(Italy = cases) %>%
  dplyr::select(Italy, days_since)
ol <- left_join(ol1, ol2) %>%
  dplyr::select(days_since, date, country, cases, predicted, Italy,doubling_time)
ol <- tidyr::gather(ol, key, value, cases: Italy) %>%
  mutate(key = Hmisc::capitalize(gsub('_', ' ', key))) %>%
  mutate(key = ifelse(key == 'Predicted', 'Predicted (based on current doubling time)', key))

cols <- c('red', 'blue', 'black')
ggplot(data = ol,
       aes(x = days_since,
           y = value,
           lty = key,
           color = key)) +
  geom_line(data = ol %>% filter(!key %in% c('Confirmed cases', 'Italy')),
            size = 1.2, alpha = 0.8) +
    geom_line(data = ol %>% filter(key %in% c('Italy')),
            size = 0.8, alpha = 0.8) +
  geom_point(data = ol %>% filter(key == 'Confirmed cases')) +
  geom_line(data = ol %>% filter(key == 'Confirmed cases'),
            size = 0.8) +
  facet_wrap(~paste0(country, '\n',
                     '(doubling time: ', 
                     round(doubling_time, digits = 1), ' days)'), scales = 'free') +
  theme_simple() +
  scale_linetype_manual(name ='',
                        values = c(1,6,2)) +
  scale_color_manual(name = '',
                     values = cols) +
  theme(legend.position = 'top') +
  labs(x = 'Days since first day at >150 cumulative cases',
       y = 'Cases',
       title = 'COVID-19 CASES: ("predicted" assumes no change in doubling time)',
       caption = 'Data from Johns Hopkins. Processing: Joe Brew @joethebrew. Code: github.com/databrew/covid19',
       subtitle = '(Doubling time calculated since first day at >150 cumulative cases)') +
    theme(strip.text = element_text(size = 13),
          plot.title = element_text(size = 15))

In the above, what’s striking is how many places have trajectories that are worse than Italy’s. Yes, Italy has more cases, but it’s doubling time is less. Either that changes soon, or these other countries will soon have more cases than Italy.

Deaths or cases?

The number of cases is not necessarily the best indicator for the severity of an outbreak of this nature. Why? Because (a) testing rates and protocols are different by place and (b) testing rates are different by time (since health services are changing their approaches as things develop). In other words, when we compare the number of cases by place and time, we are introducing significant bias.

Using deaths to gauge the magnitude of the outbreak is also problematic. Death rates are differential by age, so the number of deaths depends on a country’s population period, or age structure. Also, death rates will be a function of health services, which are not of the same quality every where. And, of course, like cases, we don’t necessarily know about all of the deaths that occur because of COVID-19.

Still, there’s an argument that death rates have less bias than case rates because deaths are easier to identify than cases. Let’s accept that argument, for the time being, and have a look at death rates by country.

# Doubling time
n_deaths_start = 5
countries = c('Italy', 'Spain', 'France', 'Italy', 'Switzerland', 'Denmark', 'US', 'United Kingdom', 'Norway', 'Germany')
# countries <- sort(unique(df_country$country))

make_double_time <- function(data = df_country,
                             the_country = 'Spain',
                             n_deaths_start = 5,
                             time_ahead = 7){
   sub_data <-data %>% filter(country == the_country)
  # Only calculate on countries with n_cases_start or greater cases,
  # starting at the first day at n_cases_start or greater
  ok <- max(sub_data$deaths, na.rm = TRUE) >= n_deaths_start
  if(ok){
    counter <- counter + 1
    pd <- sub_data %>%
      filter(!is.na(deaths)) %>%
      mutate(start_date = min(date[deaths >= n_deaths_start])) %>%
      mutate(days_since = date - start_date) %>%
      filter(days_since >= 0) %>%
      mutate(days_since = as.numeric(days_since)) %>%
      mutate(the_weight = 1/(1 + (as.numeric(max(date) - date))))
    fit <- lm(log(deaths) ~ days_since,
              weights = the_weight,
              data = pd) 
    # fitlo <- loess(deaths ~ days_since, data = pd)
    # plot(pd$days_since, log(pd$cases))
    # abline(fit)
    ## Slope
    # curve <- fit$coef[2]
    
    # Predict days ahead
    day0 <- pd$date[pd$days_since == 0]
    fake <- tibble(days_since = seq(0, max(pd$days_since) + time_ahead, by = 1))
    fake <- fake %>%mutate(date = seq(day0, day0+max(fake$days_since), by = 1))
    fake <- left_join(fake, pd %>% dplyr::select(days_since, deaths, date))
    fake$predicted <- exp(predict(fit, newdata = fake))
    # fake$predictedlo <- predict(fitlo, newdata = fake)
    ci <- exp(predict(fit, newdata = fake, interval = 'prediction'))
    # cilo <- predict(fitlo, newdata = fake, interval = 'prediction')

    fake$lwr <- ci[,'lwr']
    fake$upr <- ci[,'upr']
    # fake$lwrlo <- ci[,'lwr']
    # fake$uprlo <- ci[,'upr']
    # Doubling time
    dt <- log(2)/fit$coef[2]
    fake %>% mutate(country = the_country) %>% mutate(doubling_time = dt)
  }
}

plot_double_time <- function(data, ylog = F){
  the_labs <- labs(x = 'Date',
                   y = 'Deaths',
                   title = paste0('Predicted deaths in ', data$country[1]))
  long <- data %>%
    tidyr::gather(key, value, deaths:predicted) %>%
    mutate(key = Hmisc::capitalize(key))
  g <- ggplot() +
        geom_ribbon(data = data %>% filter(date > max(long$date[!is.na(long$value) & long$key == 'Deaths'])),
                aes(x = date,
                    ymax = upr,
                    ymin = lwr),
                alpha =0.6,
                fill = 'darkorange') +
    geom_line(data = long,
              aes(x = date,
                  y = value,
                  group = key,
                  lty = key)) +
    geom_point(data = long %>% filter(key == 'Deaths'),
               aes(x = date,
                   y = value)) +
    theme_simple() +
    theme(legend.position = 'right',
          legend.title = element_blank()) +
    the_labs
  if(ylog){
    g <- g + scale_y_log10()
  }
  return(g)
}
options(scipen = '999')
data <- make_double_time(n_deaths_start = 150, time_ahead = 7)
data
# A tibble: 52 x 8
   days_since date       country deaths predicted   lwr   upr doubling_time
        <dbl> <date>     <chr>    <dbl>     <dbl> <dbl> <dbl>         <dbl>
 1          0 2020-03-14 Spain      292     1808. 1173. 2786.          11.0
 2          1 2020-03-15 Spain      314     1926. 1260. 2944.          11.0
 3          2 2020-03-16 Spain      496     2051. 1352. 3112.          11.0
 4          3 2020-03-17 Spain      590     2185. 1451. 3290.          11.0
 5          4 2020-03-18 Spain      765     2327. 1558. 3478.          11.0
 6          5 2020-03-19 Spain      993     2479. 1671. 3677.          11.0
 7          6 2020-03-20 Spain     1326     2641. 1793. 3888.          11.0
 8          7 2020-03-21 Spain     1672     2813. 1924. 4112.          11.0
 9          8 2020-03-22 Spain     2136     2996. 2064. 4349.          11.0
10          9 2020-03-23 Spain     2707     3192. 2214. 4601.          11.0
# … with 42 more rows
dir.create('/tmp/ccaa_predictions')

plot_double_time(data, ylog = T) +
  labs(subtitle = 'Basic log-linear model weighted at (1 + (1/ days ago)),\nassuming no change in growth trajectory since first day at >150 deaths')

ggsave('/tmp/ccaa_predictions/spain.png')
# All ccaas
ccaas <- sort(unique(esp_df$ccaa))
for(i in 1:length(ccaas)){
  message(i)
  this_ccaa <- ccaas[i]
  sub_data <- esp_df %>% mutate(country = ccaa) 
  try({
    data <- make_double_time(
    data = sub_data,
    the_country = this_ccaa,
    n_deaths_start = 5,
    time_ahead = 7)
  plot_double_time(data, ylog = T) +
  labs(subtitle = 'Basic log-linear model weighted at (1 + (1/ days ago)), assuming no change in growth trajectory since first day at >5 deaths')
  ggsave(paste0('/tmp/ccaa_predictions/',
                this_ccaa, '.png'),
         height = 4.9,
         width = 8.5)
  })

}
Error in UseMethod("gather_") : 
  no applicable method for 'gather_' applied to an object of class "NULL"
Error in UseMethod("gather_") : 
  no applicable method for 'gather_' applied to an object of class "NULL"
# all_countries <- sort(unique(df_country$country))
# for(i in 1:length(all_countries)){
#   this_country <- all_countries[i]
#   data <- make_double_time(the_country = this_country, n_deaths_start = 5)
#   if(!is.null(data)){
#     # print(this_country)
#     g <- plot_double_time(data, ylog = F) +
#   labs(subtitle = 'Basic log-linear model assuming no change in growth trajectory since first day at >5 deaths')
#     ggsave(paste0('/tmp/', this_country, '.png'), height = 5, width = 8)
#     print(data)
#   }
# }
out_list <- curve_list <-  list()
counter <- 0
for(i in 1:length(countries)){
  message(i)
  this_country <- countries[i]
  sub_data <-df_country %>% filter(country == this_country)
  # Only calculate on countries with n_cases_start or greater cases,
  # starting at the first day at n_cases_start or greater
  ok <- max(sub_data$deaths, na.rm = TRUE) >= n_deaths_start
  if(ok){
    counter <- counter + 1
    pd <- sub_data %>%
      filter(!is.na(deaths)) %>%
      mutate(start_date = min(date[deaths >= n_deaths_start])) %>%
      mutate(days_since = date - start_date) %>%
      filter(days_since >= 0) %>%
      mutate(days_since = as.numeric(days_since))
    fit <- lm(log(deaths) ~ days_since, data = pd) 
    # plot(pd$days_since, log(pd$cases))
    # abline(fit)
    ## Slope
    # curve <- fit$coef[2]
    
    # Predict days ahead
    fake <- tibble(days_since = seq(0, max(pd$days_since) + 5, by = 1))
    fake <- left_join(fake, pd %>% dplyr::select(days_since, deaths, date))
    fake$predicted <- exp(predict(fit, newdata = fake))
    
    # Doubling time
    dt <- log(2)/fit$coef[2]
    out <- tibble(country = this_country,
                  doubling_time = dt)
    out_list[[counter]] <- out
    curve_list[[counter]] <- fake %>% mutate(country = this_country)
  }
}
done <- bind_rows(out_list)
curves <- bind_rows(curve_list)
# Get curves back in exponential form
# curves$curve <- exp(curves$curve)

# Join doubling time to curves
joined <- left_join(curves, done)

# Get rid of Italy future (since it's the "leader")
joined <- joined %>%
  filter(country != 'Italy' |
           date <= (Sys.Date() -1))


# Make long format
long <- joined %>% 
  dplyr::select(date, days_since, country, deaths, predicted, doubling_time) %>%
  tidyr::gather(key, value, deaths:predicted) %>%
  mutate(key = Hmisc::capitalize(gsub('_', ' ', key))) %>%
  mutate(key = ifelse(key == 'Predicted', 'Predicted (based on current doubling time)', key))
cols <- c('red', 'black')
sub_data <-  long %>% filter(country != 'US')
ggplot(data = sub_data,
       aes(x = days_since,
           y = value,
           lty = key,
           color = key)) +
  geom_line(data = sub_data %>% filter(key != 'Deaths'),
            size = 1.2, alpha = 0.8) +
  geom_point(data = sub_data %>% filter(key == 'Deaths')) +
  geom_line(data = sub_data %>% filter(key == 'Deaths'),
            size = 0.8) +
  facet_wrap(~paste0(country, '\n',
                     '(doubling time: ', 
                     round(doubling_time, digits = 1), ' days)'), scales = 'free') +
  theme_simple() +
  scale_linetype_manual(name ='',
                        values = c(1,2)) +
  scale_color_manual(name = '',
                     values = cols) +
  theme(legend.position = 'top') +
  labs(x = 'Days since first day at >5 cumulative deaths',
       y = 'Deaths',
       title = 'COVID-19 DEATHS: ("predicted" assumes no change in doubling time)',
       caption = 'Data from Johns Hopkins. Processing: Joe Brew @joethebrew. Code: github.com/databrew/covid19',
       subtitle = '(Doubling time calculated since first day at >5 cumulative deaths)') +
    theme(strip.text = element_text(size = 13),
          plot.title = element_text(size = 15))

Let’s again overlay Italy.

# Overlay Italy
ol1 <- joined %>% filter(!country %in% 'Italy')
ol2 <- joined %>% filter(country == 'Italy') %>% dplyr::rename(Italy = deaths) %>%
  dplyr::select(Italy, days_since)
ol <- left_join(ol1, ol2) %>%
  dplyr::select(days_since, date, country, deaths, predicted, Italy,doubling_time)
ol <- tidyr::gather(ol, key, value, deaths: Italy) %>%
  mutate(key = Hmisc::capitalize(gsub('_', ' ', key))) %>%
  mutate(key = ifelse(key == 'Predicted', 'Predicted (based on current doubling time)', key))

cols <- c('red', 'blue', 'black')
sub_data <- ol %>% 
  filter(!(key == 'Predicted (based on current doubling time)' &
             country == 'Spain' &
             days_since > 13))
ggplot(data = sub_data,
       aes(x = days_since,
           y = value,
           lty = key,
           color = key)) +
  geom_line(data = sub_data %>% filter(!key %in% c('Deaths', 'Italy')),
            size = 1.2, alpha = 0.8) +
    geom_line(data = sub_data %>% filter(key %in% c('Italy')),
            size = 0.8, alpha = 0.8) +
  geom_point(data = sub_data %>% filter(key == 'Deaths')) +
  geom_line(data = sub_data %>% filter(key == 'Deaths'),
            size = 0.8) +
  facet_wrap(~paste0(country, '\n',
                     '(doubling time: ', 
                     round(doubling_time, digits = 1), ' days)'), scales = 'free') +
  theme_simple() +
  scale_linetype_manual(name ='',
                        values = c(1,6,2)) +
  scale_color_manual(name = '',
                     values = cols) +
  scale_y_log10() +
  theme(legend.position = 'top') +
  labs(x = 'Days since first day at >5 deaths',
       y = 'Deaths',
       title = 'COVID-19 DEATHS: ("predicted" assumes no change in doubling time)',
       caption = 'Data from Johns Hopkins. Processing: Joe Brew @joethebrew. Code: github.com/databrew/covid19',
       subtitle = '(Doubling time calculated since first day at >5 cumulative deaths)') +
    theme(strip.text = element_text(size = 13),
          plot.title = element_text(size = 15)) 

Let’s look just at Spain

# Overlay Italy
ol1 <- joined %>% filter(!country %in% 'Italy',
                         country == 'Spain')
ol2 <- joined %>% filter(country == 'Italy') %>% dplyr::rename(Italy = deaths) %>%
  dplyr::select(Italy, days_since)
ol <- left_join(ol1, ol2) %>%
  dplyr::select(days_since, date, country, deaths, predicted, Italy,doubling_time)
ol <- tidyr::gather(ol, key, value, deaths: Italy) %>%
  mutate(key = Hmisc::capitalize(gsub('_', ' ', key))) %>%
  mutate(key = ifelse(key == 'Predicted', 'Predicted (based on current doubling time)', 
                      ifelse(key == 'Deaths', 'Spain', key)))

cols <- c('blue',  'black', 'red')
ggplot(data = ol,
       aes(x = days_since,
           y = value,
           lty = key,
           color = key)) +
  geom_line(data = ol %>% filter(!key %in% c('Deaths', 'Italy')),
            size = 1.2, alpha = 0.8) +
    geom_line(data = ol %>% filter(key %in% c('Italy')),
            size = 0.8, alpha = 0.8) +
  # geom_point(data = ol %>% filter(key == 'Deaths')) +
    geom_point(data = ol %>% filter(country == 'Spain',
                                    key == 'Spain'), size = 4, alpha = 0.6) +

  geom_line(data = ol %>% filter(key == 'Deaths'),
            size = 0.8) +
  # facet_wrap(~paste0(country, '\n',
  #                    '(doubling time: ', 
  #                    round(doubling_time, digits = 1), ' days)'), scales = 'free') +
  theme_simple() +
  scale_linetype_manual(name ='',
                        values = c(1,6,1)) +
  scale_color_manual(name = '',
                     values = cols) +
  scale_y_log10() +
  theme(legend.position = 'top') +
  labs(x = 'Days since first day at >5 deaths',
       y = 'Deaths',
       title = 'COVID-19 DEATHS: ("predicted" assumes no change in doubling time)',
       caption = 'Data from Johns Hopkins. Processing: Joe Brew @joethebrew. Code: github.com/databrew/covid19',
       subtitle = '(Doubling time calculated since first day at >5 cumulative deaths)') +
    theme(strip.text = element_text(size = 13),
          plot.title = element_text(size = 15),
          axis.title = element_text(size = 18))

The importance of lag

Things are changing very rapidly. And measures being taken by these countries will have an impact on the outbreak.

But it’s important to remember that there is a lag between when an intervention takes place and when its effect is notable. Because of the incubation period - the number of days between someone getting infected and becoming sick - what we do today won’t really have an effect until next weekend. And the clinical cases that present today are among people who got infected a week ago.

Disease control measures work. We can see that clearly in the case of Hubei, Wuhan, Iran, Japan. And they will work in Europe too. But because many of these measures were implemented very recently, we won’t likely see a major effect for at least a few more days.

In the mean time, it’s important to practice social distancing. Stay away from others to keep both you and others safe. Listen to Health Authorities. Take this very seriously.

Spain and Italy regions

# Madrid vs Lombardy deaths
n_death_start <- 5
pd <- esp_df %>%
  # filter(ccaa == 'Madrid') %>%
  dplyr::select(date, ccaa, cases, deaths) %>%
  bind_rows(ita %>%
              # filter(ccaa == 'Lombardia') %>%
              dplyr::select(date, ccaa, cases, deaths)) %>%
  arrange(date) %>%
  group_by(ccaa) %>%
  mutate(first_n_death = min(date[deaths >= n_death_start])) %>%
  ungroup %>%
  mutate(days_since_n_deaths = date - first_n_death) %>%
  filter(is.finite(days_since_n_deaths))

pd$country <- pd$ccaa
pd$cases <- pd$cases
countries <- sort(unique(pd$country))
out_list <- curve_list <-  list()
counter <- 0
for(i in 1:length(countries)){
  message(i)
  this_country <- countries[i]
  sub_data <- pd %>% filter(country == this_country)
  # Only calculate on countries with n_cases_start or greater cases,
  # starting at the first day at n_cases_start or greater
  # ok <- max(sub_data$deaths, na.rm = TRUE) >= n_deaths_start
  ok <- length(which(sub_data$deaths >= n_deaths_start))
  if(ok){
    counter <- counter + 1
    sub_pd <- sub_data %>%
      filter(!is.na(deaths)) %>%
      mutate(start_date = min(date[deaths >= n_deaths_start])) %>%
      mutate(days_since = date - start_date) %>%
      filter(days_since >= 0) %>%
      mutate(days_since = as.numeric(days_since))
    fit <- lm(log(deaths) ~ days_since, data = sub_pd) 
    # plot(pd$days_since, log(pd$cases))
    # abline(fit)
    ## Slope
    # curve <- fit$coef[2]
    
    # Predict days ahead
    fake <- tibble(days_since = seq(0, max(sub_pd$days_since) + 5, by = 1))
    fake <- left_join(fake, sub_pd %>% dplyr::select(days_since, deaths, date))
    fake$predicted <- exp(predict(fit, newdata = fake))
    
    # Doubling time
    dt <- log(2)/fit$coef[2]
    out <- tibble(country = this_country,
                  doubling_time = dt)
    out_list[[counter]] <- out
    curve_list[[counter]] <- fake %>% mutate(country = this_country)
  }
}
done <- bind_rows(out_list)
curves <- bind_rows(curve_list)
# Get curves back in exponential form
# curves$curve <- exp(curves$curve)

# Join doubling time to curves
joined <- left_join(curves, done)


# Make long format
long <- joined %>% 
  dplyr::select(date, days_since, country, deaths, predicted, doubling_time) %>%
  tidyr::gather(key, value, deaths:predicted) %>%
  mutate(key = Hmisc::capitalize(gsub('_', ' ', key))) %>%
  mutate(key = ifelse(key == 'Predicted', 'Predicted (based on current doubling time)', key))

# Remove those with not enough data to have a doubling time yet
long <- long %>% filter(!is.na(doubling_time))
text_size <- 12

cols <- c('red', 'black')
ggplot(data = long,
       aes(x = days_since,
           y = value,
           lty = key,
           color = key)) +
  geom_line(data = long %>% filter(key != 'Deaths'),
            size = 1.2, alpha = 0.8) +
  geom_point(data = long %>% filter(key == 'Deaths')) +
  geom_line(data = long %>% filter(key == 'Deaths'),
            size = 0.8) +
  facet_wrap(~paste0(country, '\n',
                     '(doubling time: ', 
                     round(doubling_time, digits = 1), ' days)'), scales = 'free') +
  theme_simple() +
  scale_y_log10() +
  scale_linetype_manual(name ='',
                        values = c(1,2)) +
  scale_color_manual(name = '',
                     values = cols) +
  theme(legend.position = 'top') +
  labs(x = 'Days since first day at >150 cumulative cases',
       y = 'Deaths',
       title = 'COVID-19 DEATHS: ("predicted" assumes no change in doubling time)',
       caption = 'Data from Johns Hopkins. Processing: Joe Brew @joethebrew. Code: github.com/databrew/covid19',
       subtitle = '(Doubling time calculated since first day at >5 cumulative deaths)') +
    theme(strip.text = element_text(size = text_size * 0.5),
          plot.title = element_text(size = 15))

Let’s overlay Lombardy

# Overlay Italy
ol1 <- joined %>% filter(!country %in% 'Lombardia')
ol2 <- joined %>% filter(country == 'Lombardia') %>% dplyr::rename(Lombardia = deaths) %>%
  dplyr::select(Lombardia, days_since)
ol <- left_join(ol1, ol2) %>%
  dplyr::select(days_since, date, country, deaths, predicted, Lombardia,doubling_time)
ol <- tidyr::gather(ol, key, value, deaths: Lombardia) %>%
  mutate(key = Hmisc::capitalize(gsub('_', ' ', key))) %>%
  mutate(key = ifelse(key == 'Predicted', 'Predicted (based on current doubling time)', key))

# Remove those with not enough data to have a doubling time yet
ol <- ol %>% filter(!is.na(doubling_time))

cols <- c('red', 'blue', 'black')
ggplot(data = ol,
       aes(x = days_since,
           y = value,
           lty = key,
           color = key)) +
  scale_y_log10() +
  geom_line(data = ol %>% filter(!key %in% c('Deaths', 'Italy')),
            size = 1.2, alpha = 0.8) +
    geom_line(data = ol %>% filter(key %in% c('Lombardia')),
            size = 0.5, alpha = 0.8) +
  geom_point(data = ol %>% filter(key == 'Deaths')) +
  geom_line(data = ol %>% filter(key == 'Deaths'),
            size = 0.8) +
  facet_wrap(~paste0(country, '\n',
                     '(doubling time: ', 
                     round(doubling_time, digits = 1), ' days)'), scales = 'free') +
  theme_simple() +
  scale_linetype_manual(name ='',
                        values = c(1,6,2)) +
  scale_color_manual(name = '',
                     values = cols) +
  theme(legend.position = 'top') +
  labs(x = 'Days since first day at >5 deaths',
       y = 'Deaths',
       title = 'COVID-19 DEATHS: ("predicted" assumes no change in doubling time)',
       caption = 'Data from Johns Hopkins. Processing: Joe Brew @joethebrew. Code: github.com/databrew/covid19',
       subtitle = '(Doubling time calculated since first day at >5 cumulative deaths)') +
    theme(strip.text = element_text(size = text_size * 0.5),
          plot.title = element_text(size = 15))

Show only Spanish regions vs. Lombardy

text_size <- 14

# Overlay Italy
ol1 <- joined %>% filter(!country %in% 'Lombardia')
ol2 <- joined %>% filter(country == 'Lombardia') %>% dplyr::rename(Lombardia = deaths) %>%
  dplyr::select(Lombardia, days_since)
ol <- left_join(ol1, ol2) %>%
  dplyr::select(days_since, date, country, deaths, predicted, Lombardia,doubling_time)
ol <- tidyr::gather(ol, key, value, deaths: Lombardia) %>%
  mutate(key = Hmisc::capitalize(gsub('_', ' ', key))) %>%
  mutate(key = ifelse(key == 'Predicted', 'Predicted (based on current doubling time)', key))

# Remove those with not enough data to have a doubling time yet
ol <- ol %>% filter(!is.na(doubling_time))

# Only Spain
ol <- ol %>% filter(country %in% esp_df$ccaa) %>%
  filter(!country %in% 'Aragón')

cols <- c('red', 'blue', 'black')
ggplot(data = ol,
       aes(x = days_since,
           y = value,
           lty = key,
           color = key)) +
  scale_y_log10() +
  geom_line(data = ol %>% filter(!key %in% c('Deaths', 'Lombardia')),
            size = 1.2, alpha = 0.8) +
    geom_line(data = ol %>% filter(key %in% c('Lombardia')),
            size = 0.5, alpha = 0.8) +
  geom_point(data = ol %>% filter(key == 'Deaths')) +
  geom_line(data = ol %>% filter(key == 'Deaths'),
            size = 0.8) +
  facet_wrap(~paste0(country, '\n',
                     '(doubling time: ', 
                     round(doubling_time, digits = 1), ' days)'), scales = 'free') +
  theme_simple() +
  scale_linetype_manual(name ='',
                        values = c(1,6,2)) +
  scale_color_manual(name = '',
                     values = cols) +
  theme(legend.position = 'top') +
  labs(x = 'Days since first day at >5 deaths',
       y = 'Deaths',
       title = 'COVID-19 DEATHS: ("predicted" assumes no change in doubling time)',
       caption = 'Data from Johns Hopkins. Processing: Joe Brew @joethebrew. Code: github.com/databrew/covid19',
       subtitle = '(Doubling time calculated since first day at >5 cumulative deaths)') +
    theme(strip.text = element_text(size = text_size * 0.6),
          plot.title = element_text(size = 15))

Same plot but overlayed

Same as above, but overlaid

text_size <-10

# cols <- c('red', 'black')
long <- long %>% filter(country %in% c('Lombardia',
                                       'Emilia Romagna') |
                          country %in% esp_df$ccaa) %>%
  filter(country != 'Aragón')
places <- sort(unique(long$country))

cols <- colorRampPalette(RColorBrewer::brewer.pal(n = 7, 'Spectral'))(length(places))
cols[which(places == 'Madrid')] <- 'red'
cols[which(places == 'Cataluña')] <- 'purple'
cols[which(places == 'Lombardia')] <- 'darkorange'
cols[which(places == 'Emilia Romagna')] <- 'darkgreen'

long$key <- ifelse(long$key != 'Deaths', 'Predicted', long$key)
long$key <- ifelse(long$key == 'Predicted', 'Muertes\nprevistas',
                   'Muertes\nobservadas')


# Keep only Madrid, Lombardy, Emilia Romagna
long <- long %>%
  filter(country %in% c('Madrid',
                        'Lombardia',
                        'Emilia Romagna'))

ggplot(data = long,
       aes(x = days_since,
           y = value,
           lty = key,
           color = country)) +
  geom_point(data = long %>% filter(key == 'Muertes\nobservadas'), size = 2, alpha = 0.8) +
  geom_line(data = long %>% filter(key == 'Muertes\nprevistas'), size = 1, alpha = 0.7) +
  geom_line(data = long %>% filter(key != 'Muertes\nprevistas'), size = 0.8) +
  theme_simple() +
  scale_y_log10() +
  scale_linetype_manual(name ='',
                        values = c(1,4)) +
  scale_color_manual(name = '',
                     values = cols) +
  theme(legend.position = 'top') +
  # labs(x = 'Days since first day at 5 or more cumulative deaths',
  #      y = 'Deaths',
  #      title = 'COVID-19 DEATHS: ("predicted" assumes no change in doubling time)',
  #      caption = 'Data from Johns Hopkins. Processing: Joe Brew @joethebrew. Code: github.com/databrew/covid19',
  #      subtitle = '(Doubling time calculated since first day at >5 cumulative deaths)') +
    labs(x = 'Días desde el primer día a 5 o más muertes acumuladas',
       y = 'Muertes (escala logarítmica)',
       title = 'Muertes por COVID-19',
       caption = 'Data from Johns Hopkins. Processing: Joe Brew @joethebrew. Code: github.com/databrew/covid19',
       subtitle = '(Tasa de crecimiento calculada desde el primer día a 5 o más muertes acumuladas)\n(Muertes "previstas": suponiendo que no hay cambios en la tasa de crecimiento)') +
    theme(strip.text = element_text(size = text_size * 0.75),
          plot.title = element_text(size = text_size * 3),
          legend.text = element_text(size = text_size * 1.5),
          axis.title = element_text(size = text_size * 2),
          axis.text = element_text(size = text_size * 2))

# cols <- c(cols, 'darkorange')
# ggplot(data = ol,
#        aes(x = days_since,
#            y = value,
#            lty = key,
#            color = key)) +
#   scale_y_log10() +
#   geom_line(aes(color = country)) +
#   
#   # geom_line(data = ol %>% filter(!key %in% c('Deaths', 'Italy')),
#   #           size = 1.2, alpha = 0.8) +
#   #   geom_line(data = ol %>% filter(key %in% c('Lombardia')),
#   #           size = 0.5, alpha = 0.8) +
#   # geom_point(data = ol %>% filter(key == 'Deaths')) +
#   # geom_line(data = ol %>% filter(key == 'Deaths'),
#   #           size = 0.8) +
#   theme_simple() +
#   scale_linetype_manual(name ='',
#                         values = c(1,6,2)) +
#   scale_color_manual(name = '',
#                      values = cols) +
#   theme(legend.position = 'top') +
#   labs(x = 'Days since first day at >5 deaths',
#        y = 'Deaths',
#        title = 'COVID-19 DEATHS: ("predicted" assumes no change in doubling time)',
#        caption = 'Data from Johns Hopkins. Processing: Joe Brew @joethebrew. Code: github.com/databrew/covid19',
#        subtitle = '(Doubling time calculated since first day at >5 cumulative deaths)') +
#     theme(strip.text = element_text(size = text_size * 1),
#           plot.title = element_text(size = 15))
# Map data preparation

if(!'map.RData' %in% dir()){
  esp1 <- getData(name = 'GADM', country = 'ESP', level = 1)
# Remove canary
esp1 <- esp1[esp1@data$NAME_1 != 'Islas Canarias',]
espf <- fortify(esp1, region = 'NAME_1')

# Standardize names
# Convert names
map_names <- esp1@data$NAME_1
data_names <- sort(unique(esp_df$ccaa))
names_df <- tibble(NAME_1 = c('Andalucía',
 'Aragón',
 'Cantabria',
 'Castilla-La Mancha',
 'Castilla y León',
 'Cataluña',
 'Ceuta y Melilla',
 'Comunidad de Madrid',
 'Comunidad Foral de Navarra',
 'Comunidad Valenciana',
 'Extremadura',
 'Galicia',
 'Islas Baleares',
 'La Rioja',
 'País Vasco',
 'Principado de Asturias',
 'Región de Murcia'),
 ccaa = c('Andalucía',
 'Aragón',
 'Cantabria',
 'CLM',
 'CyL',
 'Cataluña',
 'Melilla',
 'Madrid',
 'Navarra',
 'C. Valenciana',
 'Extremadura',
 'Galicia',
 'Baleares',
 'La Rioja',
 'País Vasco',
 'Asturias',
 'Murcia'))


espf <- left_join(espf %>% dplyr::rename(NAME_1 = id), names_df)
centroids <- data.frame(coordinates(esp1))
names(centroids) <- c('long', 'lat')
centroids$NAME_1 <- esp1$NAME_1
centroids <- centroids %>% left_join(names_df)

# Get random sampling points

  random_list <- list()
  for(i in 1:nrow(esp1)){
    message(i)
    shp <- esp1[i,]
    # bb <- bbox(shp)
    this_ccaa <- esp1@data$NAME_1[i]
    # xs <- runif(n = 500, min = bb[1,1], max = bb[1,2])
    # ys <- runif(n = 500, min = bb[2,1], max = bb[2,2])
    # random_points <- expand.grid(long = xs, lat = ys) %>%
    #   mutate(x = long,
    #          y = lat)
    # coordinates(random_points) <- ~x+y
    # proj4string(random_points) <- proj4string(shp)
    # get ccaa
    message('getting locations of randomly generated points')
    # polys <- over(random_points,polygons(shp))
    # polys <- as.numeric(polys)
    random_points <- spsample(shp, n = 20000, type = 'random')
    random_points <- data.frame(random_points)
    random_points$NAME_1 <-  this_ccaa
    random_points <- left_join(random_points, names_df) %>% dplyr::select(-NAME_1)
    random_list[[i]] <- random_points
  }
  random_points <- bind_rows(random_list)
  random_points <- random_points %>% mutate(long = x,
                                            lat = y)

save(espf,
     esp1,
     names_df,
     centroids,
     random_points,
     file = 'map.RData')
} else {
  load('map.RData')
}

# Define a function for adding zerio
add_zero <- 
  function (x, n) 
  {
    x <- as.character(x)
    adders <- n - nchar(x)
    adders <- ifelse(adders < 0, 0, adders)
    for (i in 1:length(x)) {
      if (!is.na(x[i])) {
        x[i] <- paste0(paste0(rep("0", adders[i]), collapse = ""), 
                       x[i], collapse = "")
      }
    }
    return(x)
  }
remake_world_map <- FALSE
options(scipen = '999')
if(remake_world_map){
  # World map animation
  world <- map_data('world')
  # world <- ne_countries(scale = "medium", returnclass = "sf")
  
  # Get plotting data
  pd <- df_country %>%
    dplyr::select(date, lng, lat, n = cases)
  dates <- sort(unique(pd$date))
  n_days <- length(dates)
  # # Define vectors for projection
  # vec_lon <- seq(30, -20, length = n_days)
  # vec_lat <- seq(25, 15, length = n_days)
  
  dir.create('animation')
  for(i in 1:n_days){
    message(i, ' of ', n_days)
    this_date <- dates[i]
    # this_lon <- vec_lon[i]
    # this_lat <- vec_lat[i]
    # the_crs <-
    #   paste0("+proj=laea +lat_0=", this_lat,
    #          " +lon_0=",
    #          this_lon,
    #          " +x_0=4321000 +y_0=3210000 +ellps=GRS80 +units=m +no_defs ")
    sub_data <- pd %>%
      filter(date == this_date)
    # coordinates(sub_data) <- ~lng+lat
    # proj4string(sub_data) <- proj4string(esp1)
    # # sub_data <- spTransform(sub_data,
    # #                         the_crs)
    # coordy <- coordinates(sub_data)
    # sub_data@data$long <- coordy[,1]
    # sub_data@data$lat <- coordy[,2]
  
    g <- ggplot() +
      geom_polygon(data = world,
                   aes(x = long,
                       y = lat,
                       group = group),
                   fill = 'black',
                   color = 'white',
                   size = 0.1) +
      theme_map() +
          geom_point(data = sub_data %>% filter(n > 0) %>% mutate(Deaths = n),
                 aes(x = lng,
                     y = lat,
                     size = Deaths),
                 color = 'red',
                 alpha = 0.6) +
      geom_point(data = tibble(x = c(0,0), y = c(0,0), Deaths = c(1, 100000)),
                 aes(x = x,
                     y = y,
                     size = Deaths),
                 color = 'red',
                 alpha =0.001) +
      scale_size_area(name = '', breaks = c(100, 1000, 10000, 100000),
                      max_size = 25
                      ) +
    # scale_size_area(name = '', limits = c(1, 10), breaks = c(0, 10, 30, 50, 70, 100, 200, 500)) +
      labs(title = this_date) +
      theme(plot.title = element_text(size = 30),
            legend.text = element_text(size = 15),
            legend.position = 'left')
  
    plot_number <- add_zero(i, 3)
    ggsave(filename = paste0('animation/', plot_number, '.png'),
           plot = g,
           width = 9.5,
           height = 5.1)
  }
  setwd('animation')
  system('convert -delay 30x100 -loop 0 *.png result.gif')
  setwd('..')

}

Maps of Spain

make_map <- function(var = 'deaths',
                     data = NULL,
                     pop = FALSE,
                     pop_factor = 100000,
                     points = FALSE,
                     line_color = 'white',
                     add_names = T,
                     add_values = T,
                     text_size = 2.7){
  
  if(is.null(data)){
    data <- esp_df %>%  mutate(ccaa = cat_transform(ccaa))

  }

  left <- espf %>%   mutate(ccaa = cat_transform(ccaa)) 
  right <- data[,c('ccaa', paste0(var, '_non_cum'))]
  

  names(right)[ncol(right)] <- 'var'
  right <- right %>% group_by(ccaa) %>% summarise(var = sum(var, na.rm = T))
  
  if(pop){
    right <- left_join(right, esp_pop)
    right$var <- right$var / right$pop * pop_factor
  }
  map <- left_join(left, right)
  
  if(points){
    the_points <- centroids %>%
      left_join(right)
    g <- ggplot() +
      geom_polygon(data = map,
         aes(x = long,
             y = lat,
             group = group),
         fill = 'black',
         color = line_color,
         lwd = 0.4, alpha = 0.8) +
      geom_point(data = the_points,
                 aes(x = long,
                     y = lat,
                     size = var),
                 color = 'red',
                 alpha = 0.7) +
      scale_size_area(name = '', max_size = 20)
  } else {
    # cols <- c('#008080','#70a494','#b4c8a8','#f6edbd','#edbb8a','#de8a5a','#ca562c')
    cols <- RColorBrewer::brewer.pal(n = 8, name = 'Blues')
    g <- ggplot(data = map,
         aes(x = long,
             y = lat,
             group = group)) +
    geom_polygon(aes(fill = var),
                 lwd = 0.3,
                 color = line_color) +
      scale_fill_gradientn(name = '',
                           colours = cols)
    # scale_fill_viridis(name = '' ,option = 'magma',
    #                    direction = -1) 
  }
  
  # Add names?
  if(add_names){
    centy <- centroids %>% left_join(right)
    if(add_values){
      centy$label <- paste0(centy$ccaa, '\n(', round(centy$var, digits = 2), ')')
    } else {
      centy$label <- centy$ccaa
    }

    g <- g +
      geom_text(data = centy,
                aes(x = long,
                    y = lat,
                    label = label,
                    group = ccaa),
                alpha = 0.7,
                size = text_size)
  }
  
  g +
    theme_map() +
    labs(subtitle = paste0('Data as of ', max(data$date))) +
    theme(legend.position = 'right')
  
}

make_dot_map <- function(var = 'deaths',
                     date = NULL,
                     pop = FALSE,
                     pop_factor = 100,
                     point_factor = 1,
                     points = FALSE,
                     point_color = 'darkred',
                     point_size = 0.6,
                     point_alpha = 0.5){
  
  
  if(is.null(date)){
    the_date <- max(esp_df$date)
  } else {
    the_date <- date
  }
    right <- esp_df[esp_df$date == the_date,c('ccaa', var)]
   names(right)[ncol(right)] <- 'var'
  if(pop){
    right <- left_join(right, esp_pop)
    right$var <- right$var / right$pop * pop_factor
  }
  map_data <- esp1@data %>%
    left_join(names_df) %>%
      left_join(right)
  map_data$var <- map_data$var / point_factor
  out_list <- list()
  for(i in 1:nrow(map_data)){
    sub_data <- map_data[i,]
    this_value = round(sub_data$var)

    if(this_value >= 1){
      this_ccaa = sub_data$ccaa
      # get some points
      sub_points <- random_points %>% filter(ccaa == this_ccaa)
      sampled_points <- sub_points %>% dplyr::sample_n(this_value)
      out_list[[i]] <- sampled_points
    }
  }
  the_points <- bind_rows(out_list)
  
  g <- ggplot() +
    geom_polygon(data = espf,
         aes(x = long,
             y = lat,
             group = group),
         fill = 'white',
         color = 'black',
         lwd = 0.4, alpha = 0.8) +
    geom_point(data = the_points,
               aes(x = long,
                   y = lat),
               color = point_color,
               size = point_size,
               alpha = point_alpha)
  g +
    theme_map() +
    labs(subtitle = paste0('Data as of ', max(esp_df$date)))
  
}

Deaths

Absolute number of deaths: points

make_map(var = 'deaths',
       points = T) +
  labs(title = 'Number of deaths',
       caption = '@joethebrew')

Absolute number of deaths: choropleth

make_map(var = 'deaths',
         line_color = 'darkgrey',
       points = F) +
  labs(title = 'Number of deaths',
       caption = '@joethebrew')

Number of deaths adjusted by population: points

make_map(var = 'deaths', pop = TRUE, points = T) +
  labs(title = 'Number of deaths per 100,000',
       caption = '@joethebrew')

Number of deaths adjusted by population: polygons

make_map(var = 'deaths', pop = TRUE, points = F, line_color = 'darkgrey') +
  labs(title = 'Number of deaths per 100,000',
       caption = '@joethebrew')

Number of deaths: 1 dot per death

make_dot_map(var = 'deaths', point_size = 0.05) +
  labs(title = 'COVID-19 deaths: 1 point = 1 death\nImportant: points are random within each CCAA; do not reflect exact location',
       caption = '@joethebrew')
Error in if (this_value >= 1) {: missing value where TRUE/FALSE needed

Cases

Absolute number of cases: points

make_map(var = 'cases',
       points = T) +
  labs(title = 'Number of confirmed cases',
       caption = '@joethebrew')

Absolute number of cases: choropleth

make_map(var = 'cases',
         line_color = 'darkgrey',
       points = F) +
  labs(title = 'Number of confirmed cases',
       caption = '@joethebrew')

Number of cases adjusted by population: points

make_map(var = 'cases', pop = TRUE, points = T) +
  labs(title = 'Number of confirmed cases per 100,000',
       caption = '@joethebrew')

Number of cases adjusted by population: polygons

make_map(var = 'cases', pop = TRUE, points = F,
         line_color = 'darkgrey') +
  labs(title = 'Number of confirmed cases per 100,000',
       caption = '@joethebrew')

Number of cases: points

make_dot_map(var = 'cases',
             point_size = 0.05, point_alpha = 0.5, point_factor = 10) +
  labs(title = 'COVID-19 cases: 1 point = 10 cases\nImportant: points are random within each CCAA; do not reflect exact location',
       caption = '@joethebrew')
Error in if (this_value >= 1) {: missing value where TRUE/FALSE needed